diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000000..97c8c97fe3 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,20 @@ +{ + "name": "nfcore", + "image": "nfcore/devcontainer:latest", + + "remoteUser": "root", + "privileged": true, + + "remoteEnv": { + // Workspace path on the host for mounting with docker-outside-of-docker + "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" + }, + + "onCreateCommand": "./.devcontainer/setup.sh", + + "hostRequirements": { + "cpus": 4, + "memory": "16gb", + "storage": "32gb" + } +} diff --git a/.devcontainer/setup.sh b/.devcontainer/setup.sh new file mode 100755 index 0000000000..a5af98b254 --- /dev/null +++ b/.devcontainer/setup.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# Customise the terminal command prompt +echo "export PROMPT_DIRTRIM=2" >> $HOME/.bashrc +echo "export PS1='\[\e[3;36m\]\w ->\[\e[0m\\] '" >> $HOME/.bashrc +export PROMPT_DIRTRIM=2 +export PS1='\[\e[3;36m\]\w ->\[\e[0m\\] ' + +# Update Nextflow +nextflow self-update + +# Update welcome message +echo "Welcome to the nf-core/sarek devcontainer!" > /usr/local/etc/vscode-dev-containers/first-run-notice.txt diff --git a/.editorconfig b/.editorconfig deleted file mode 100644 index 95549501a2..0000000000 --- a/.editorconfig +++ /dev/null @@ -1,27 +0,0 @@ -root = true - -[*] -charset = utf-8 -end_of_line = lf -insert_final_newline = true -trim_trailing_whitespace = true -indent_size = 4 -indent_style = space - -[*.{yml,yaml}] -indent_size = 2 - -[*.json] -insert_final_newline = unset - -# These files are edited and tested upstream in nf-core/modules -[/modules/nf-core/**] -charset = unset -end_of_line = unset -insert_final_newline = unset -trim_trailing_whitespace = unset -indent_style = unset -indent_size = unset - -[/assets/email*] -indent_size = unset diff --git a/.gitattributes b/.gitattributes index 050bb12035..7a2dabc293 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.config linguist-language=nextflow +*.nf.test linguist-language=nextflow modules/nf-core/** linguist-generated subworkflows/nf-core/** linguist-generated diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 1fdc8e3066..903e7aa141 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,2 +1,3 @@ -* @FriederikeHanssen -* @maxulysse +* @FriederikeHanssen @maxulysse +*.nf.test* @nf-core/nf-test +.github/workflows/ @nf-core/a-team diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index f7c3699d43..ebbeff3672 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,4 +1,4 @@ -# nf-core/sarek: Contributing Guidelines +# `nf-core/sarek`: Contributing Guidelines Hi there! Many thanks for taking an interest in improving nf-core/sarek. @@ -9,23 +9,29 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) +> [!NOTE] > If you need help using or modifying nf-core/sarek then the best place to ask is on the nf-core Slack [#sarek](https://nfcore.slack.com/channels/sarek) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow If you'd like to write some code for nf-core/sarek, the standard workflow is as follows: -1. Check that there isn't already an issue about your idea in the [nf-core/sarek issues](https://github.com/nf-core/sarek/issues) to avoid duplicating work - * If there isn't one already, please create one so that others know you're working on this +1. Check that there isn't already an issue about your idea in the [nf-core/sarek issues](https://github.com/nf-core/sarek/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this 2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/sarek repository](https://github.com/nf-core/sarek) to your GitHub account 3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) -4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +4. Use `nf-core pipelines schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). 5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). ## Tests +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -34,7 +40,7 @@ There are typically two types of tests that run: ### Lint tests `nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. -To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core pipelines lint ` command. If any failures or warnings are encountered, please follow the listed URL for more documentation. @@ -49,9 +55,9 @@ These tests are run both with the latest available version of `Nextflow` and als :warning: Only in the unlikely and regretful event of a release happening with a bug. -* On your own fork, make a new branch `patch` based on `upstream/master`. -* Fix the bug, and bump version (X.Y.Z+1). -* A PR should be made on `master` from patch to directly this particular bug. +- On your own fork, make a new branch `patch` based on `upstream/main` or `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- Open a pull-request from `patch` to `main`/`master` with the changes. ## Getting help @@ -59,46 +65,61 @@ For further information/help, please consult the [nf-core/sarek documentation](h ## Pipeline contribution conventions -To make the nf-core/sarek code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. +To make the `nf-core/sarek` code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. ### Adding a new step If you wish to contribute a new step, please use the following coding standards: -1. Define the corresponding input channel into your new process from the expected previous process channel +1. Define the corresponding input channel into your new process from the expected previous process channel. 2. Write the process block (see below). 3. Define the output channel if needed (see below). 4. Add any new parameters to `nextflow.config` with a default (see below). -5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool). 6. Add sanity checks and validation for all relevant parameters. 7. Perform local tests to validate that the new code works as expected. -8. If applicable, add a new test command in `.github/workflow/ci.yml`. -9. Update MultiQC config `assets/multiqc_config.yaml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +8. If applicable, add a new test in the `tests` directory. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. 10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. ### Default values -Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. +Parameters should be initialised / defined with default values within the `params` scope in `nextflow.config`. -Once there, use `nf-core schema build` to add to `nextflow_schema.json`. +Once there, use `nf-core pipelines schema build` to add to `nextflow_schema.json`. ### Default processes resource requirements -Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/main/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes Please use the following naming schemes, to make it easy to understand what is going where. -* initial process channel: `ch_output_from_` -* intermediate and terminal channels: `ch__for_` +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` ### Nextflow version bumping -If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core pipelines bump-version --nextflow . [min-nf-version]` ### Images and figures For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/sarek/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 2607f9286a..ff6f0cfaa1 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -1,9 +1,7 @@ - name: Bug report description: Report something that is broken or incorrect labels: bug body: - - type: markdown attributes: value: | @@ -11,7 +9,6 @@ body: - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) - [nf-core/sarek pipeline documentation](https://nf-co.re/sarek/usage) - - type: textarea id: description attributes: @@ -44,9 +41,9 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 21.10.3)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ * OS _(eg. CentOS Linux, macOS, Linux Mint)_ * Version of nf-core/sarek _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index c27f6deb10..a52619f09f 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -10,16 +10,16 @@ Remember that PRs should be made against the dev branch, unless you're preparing Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/sarek/tree/master/.github/CONTRIBUTING.md) --> - ## PR checklist - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/sarek/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/sarek _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. -- [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`). +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/sarek/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/sarek _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] Make sure your code lints (`nf-core pipelines lint`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/RELEASE_CHECKLIST.md b/.github/RELEASE_CHECKLIST.md index c7c20f86b2..1bd412973e 100644 --- a/.github/RELEASE_CHECKLIST.md +++ b/.github/RELEASE_CHECKLIST.md @@ -3,26 +3,22 @@ > This checklist is for our own reference, to help us prepare a new release 1. Check that everything is ready to go - - Desired [PRs](https://github.com/nf-core/sarek/pulls) are merged - - [GitHub Actions](https://github.com/nf-core/sarek/actions?query=workflow%3A%22nf-core+CI%22) are passing on `dev` - - [nf-core linting](https://github.com/nf-core/sarek/actions?query=workflow%3A%22nf-core+linting%22) are passing on `dev` + +- Desired [PRs](https://github.com/nf-core/sarek/pulls) are merged +- [GHA CI](https://github.com/nf-core/sarek/actions/workflows/ci.yml) are passing on `dev` +- [GHA linting](https://github.com/nf-core/sarek/actions/workflows/linting.yml) are passing on `dev` + 2. Increase version number following [semantic versioning](http://semver.org/spec/v2.0.0.html) 3. Choose an appropriate codename for the release (if major or minor) - - i.e. Peaks in [Sarek National Park](https://en.wikipedia.org/wiki/Sarek_National_Park#Topography) + +- i.e. Peaks in [Sarek National Park](https://en.wikipedia.org/wiki/Sarek_National_Park#Topography) + 4. Sync `dev` and checkout a new branch for the release 5. Bump version: - - `nf-core bump-version . ` - - edit `.circleci/config.yml` - - edit `.github/workflows/ci.yml` - - edit `conf/base.config` - - edit `conf/test.config` - - edit `containers/snpeff/Dockerfile` - - edit `containers/snpeff/environment.yml` - - edit `containers/vep/Dockerfile` - - edit `containers/vep/environment.yml` - - edit `docs/images/sarek_workflow.svg` - - generate a new `docs/images/sarek_workflow.png` - - edit `CHANGELOG` + +- `nf-core bump-version . ` +- edit `CHANGELOG` + 6. Make a PR to `master` 7. Wait for reviews 8. Merge said PR @@ -31,15 +27,21 @@ 11. RT the nf-core automated tweet about the new released version 12. Make a new branch from `dev` 13. Checkout the `CHANGELOG.md` from `master` - - `git checkout upstream/master -- CHANGELOG.md` + +- `git checkout upstream/master -- CHANGELOG.md` + 14. Add a new `Unreleased` section in `CHANGELOG.md` for the `dev` version -15. Checkout `docs/images/sarek_workflow.svg` and `docs/images/sarek_workflow.pnh` from `master` - - `git checkout upstream/master -- docs/images/sarek_workflow.svg` - - `git checkout upstream/master -- docs/images/sarek_workflow.png` +15. Checkout figures from `master` + +- `git checkout upstream/master -- docs/images/sarek_indices_subway.svg` +- `git checkout upstream/master -- docs/images/sarek_indices_subway.png` +- `git checkout upstream/master -- docs/images/sarek_subway.svg` +- `git checkout upstream/master -- docs/images/sarek_subway.png` +- `git checkout upstream/master -- docs/images/sarek_workflow.svg` +- `git checkout upstream/master -- docs/images/sarek_workflow.png` + 16. Make a PR to `dev` 17. Wait for review 18. Merge said PR -19. Download all new containers to `/sw/data/uppnex/ToolBox/nf-core` on `rackham` -20. Download newest `nf-core/sarek` to `/data1/containers` on `munin` -21. Commit and push. Continue making more awesome :metal: -22. Have fika :cake: +19. Commit and push. Continue making more awesome :metal: +20. Have fika :cake: diff --git a/.github/actions/get-shards/action.yml b/.github/actions/get-shards/action.yml new file mode 100644 index 0000000000..34085279f8 --- /dev/null +++ b/.github/actions/get-shards/action.yml @@ -0,0 +1,69 @@ +name: "Get number of shards" +description: "Get the number of nf-test shards for the current CI job" +inputs: + max_shards: + description: "Maximum number of shards allowed" + required: true + paths: + description: "Component paths to test" + required: false + tags: + description: "Tags to pass as argument for nf-test --tag parameter" + required: false +outputs: + shard: + description: "Array of shard numbers" + value: ${{ steps.shards.outputs.shard }} + total_shards: + description: "Total number of shards" + value: ${{ steps.shards.outputs.total_shards }} +runs: + using: "composite" + steps: + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: ${{ env.NFT_VER }} + - name: Get number of shards + id: shards + shell: bash + run: | + # Run nf-test with dynamic parameter + nftest_output=$(nf-test test \ + --profile +docker \ + $(if [ -n "${{ inputs.tags }}" ]; then echo "--tag ${{ inputs.tags }}"; fi) \ + --dry-run \ + --ci \ + --changed-since HEAD^) || { + echo "nf-test command failed with exit code $?" + echo "Full output: $nftest_output" + exit 1 + } + echo "nf-test dry-run output: $nftest_output" + + # Default values for shard and total_shards + shard="[]" + total_shards=0 + + # Check if there are related tests + if echo "$nftest_output" | grep -q 'No tests to execute'; then + echo "No related tests found." + else + # Extract the number of related tests + number_of_shards=$(echo "$nftest_output" | sed -n 's|.*Executed \([0-9]*\) tests.*|\1|p') + if [[ -n "$number_of_shards" && "$number_of_shards" -gt 0 ]]; then + shards_to_run=$(( $number_of_shards < ${{ inputs.max_shards }} ? $number_of_shards : ${{ inputs.max_shards }} )) + shard=$(seq 1 "$shards_to_run" | jq -R . | jq -c -s .) + total_shards="$shards_to_run" + else + echo "Unexpected output format. Falling back to default values." + fi + fi + + # Write to GitHub Actions outputs + echo "shard=$shard" >> $GITHUB_OUTPUT + echo "total_shards=$total_shards" >> $GITHUB_OUTPUT + + # Debugging output + echo "Final shard array: $shard" + echo "Total number of shards: $total_shards" diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml new file mode 100644 index 0000000000..0ad890dc3e --- /dev/null +++ b/.github/actions/nf-test/action.yml @@ -0,0 +1,121 @@ +name: "nf-test Action" +description: "Runs nf-test with common setup steps" +inputs: + profile: + description: "Profile to use" + required: true + shard: + description: "Shard number for this CI job" + required: true + total_shards: + description: "Total number of test shards(NOT the total number of matrix jobs)" + required: true + paths: + description: "Test paths" + required: true + tags: + description: "Tags to pass as argument for nf-test --tag parameter" + required: false +runs: + using: "composite" + steps: + - name: Setup Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ env.NXF_VERSION }}" + + - name: Set up Python + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 + with: + python-version: "3.14" + + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: "${{ env.NFT_VER }}" + install-pdiff: true + + - name: Setup apptainer + if: contains(inputs.profile, 'singularity') + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: contains(inputs.profile, 'singularity') + shell: bash + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Conda setup + if: contains(inputs.profile, 'conda') + uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3 + with: + auto-update-conda: true + conda-solver: libmamba + channels: conda-forge,bioconda + channel-priority: strict + conda-remove-defaults: true + + # Set up secrets + - name: Set up Nextflow secrets + if: env.SENTIEON_ENCRYPTION_KEY != '' && env.SENTIEON_LICENSE_MESSAGE != '' + shell: bash + run: | + python -m pip install cryptography + nextflow secrets set SENTIEON_AUTH_DATA $(python3 bin/license_message.py encrypt --key "$SENTIEON_ENCRYPTION_KEY" --message "$SENTIEON_LICENSE_MESSAGE") + + - name: Run nf-test + shell: bash + env: + NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + SENTIEON_LICSRVR_IP: ${{ env.SENTIEON_LICSRVR_IP }} + SENTIEON_AUTH_MECH: "GitHub Actions - token" + run: | + nf-test test \ + --profile=+${{ inputs.profile }} \ + $(if [ -n "${{ inputs.tags }}" ]; then echo "--tag ${{ inputs.tags }}"; fi) \ + --ci \ + --changed-since HEAD^ \ + --verbose \ + --tap=test.tap \ + --shard ${{ inputs.shard }}/${{ inputs.total_shards }} + + # Save the absolute path of the test.tap file to the output + echo "tap_file_path=$(realpath test.tap)" >> $GITHUB_OUTPUT + + - name: Generate test summary + if: always() + shell: bash + run: | + # Add header if it doesn't exist (using a token file to track this) + if [ ! -f ".summary_header" ]; then + echo "# 🚀 nf-test results" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Status | Test Name | Profile | Shard |" >> $GITHUB_STEP_SUMMARY + echo "|:------:|-----------|---------|-------|" >> $GITHUB_STEP_SUMMARY + touch .summary_header + fi + + if [ -f test.tap ]; then + while IFS= read -r line; do + if [[ $line =~ ^ok ]]; then + test_name="${line#ok }" + # Remove the test number from the beginning + test_name="${test_name#* }" + echo "| ✅ | ${test_name} | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + elif [[ $line =~ ^not\ ok ]]; then + test_name="${line#not ok }" + # Remove the test number from the beginning + test_name="${test_name#* }" + echo "| ❌ | ${test_name} | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + fi + done < test.tap + else + echo "| ⚠️ | No test results found | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + fi + + - name: Clean up + if: always() + shell: bash + run: | + sudo rm -rf /home/ubuntu/tests/ diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml deleted file mode 100644 index 97e0356171..0000000000 --- a/.github/workflows/awsfulltest.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: nf-core AWS full size tests -# This workflow is triggered on published releases. -# It can be additionally triggered manually with GitHub actions workflow dispatch button. -# It runs the -profile 'test_full' on AWS batch - -on: - release: - types: [published] - workflow_dispatch: -jobs: - run-tower: - name: Run AWS full tests - if: github.repository == 'nf-core/sarek' - runs-on: ubuntu-latest - steps: - - name: Launch workflow via tower - uses: nf-core/tower-action@v2 - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters - - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - pipeline: ${{ github.repository }} - revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/sarek/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/sarek/results-${{ github.sha }}" - } - profiles: test_full,aws_tower - pre_run_script: 'export NXF_VER=21.10.3' diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml deleted file mode 100644 index 60c5d6b971..0000000000 --- a/.github/workflows/awstest.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: nf-core AWS test -# This workflow can be triggered manually with the GitHub actions workflow dispatch button. -# It runs the -profile 'test' on AWS batch - -on: - workflow_dispatch: -jobs: - run-tower: - name: Run AWS tests - if: github.repository == 'nf-core/sarek' - runs-on: ubuntu-latest - steps: - - name: Launch workflow via tower - uses: nf-core/tower-action@v2 - - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - pipeline: ${{ github.repository }} - revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/sarek/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/sarek/results-test-${{ github.sha }}" - } - profiles: test,aws_tower - pre_run_script: 'export NXF_VER=21.10.3' diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index d1d6ea382e..0bb9eb5738 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -1,29 +1,30 @@ name: nf-core branch protection -# This workflow is triggered on PRs to master branch on the repository -# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` +# This workflow is triggered on PRs to `main`/`master` branch on the repository +# It fails when someone tries to make a PR against the nf-core `main`/`master` branch instead of `dev` on: pull_request_target: - branches: [master] + branches: + - main + - master jobs: test: runs-on: ubuntu-latest steps: - # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches + # PRs to the nf-core repo main/master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches - name: Check PRs if: github.repository == 'nf-core/sarek' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/sarek ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] - + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/sarek ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | - ## This PR is against the `master` branch :x: + ## This PR is against the `${{github.event.pull_request.base.ref}}` branch :x: * Do not close this PR * Click _Edit_ and change the `base` to `dev` @@ -33,9 +34,9 @@ jobs: Hi @${{ github.event.pull_request.user.login }}, - It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. - The `master` branch on nf-core repositories should always contain code from the latest release. - Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) ${{github.event.pull_request.base.ref}} branch. + The ${{github.event.pull_request.base.ref}} branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to ${{github.event.pull_request.base.ref}} are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. Note that even after this, the test will continue to show as failing until you push a new commit. @@ -43,4 +44,3 @@ jobs: Thanks again for your contribution! repo-token: ${{ secrets.GITHUB_TOKEN }} allow-repeats: false - diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index a42f9a54c6..0000000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,85 +0,0 @@ -name: nf-core CI -# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors -on: - push: - branches: - - dev - pull_request: - release: - types: [published] - -env: - NXF_ANSI_LOG: false - CAPSULE_LOG: none - -jobs: - test: - name: Run workflow tests - # Only run on push if this is the nf-core dev branch (merged PRs) - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/sarek') }} - runs-on: ubuntu-latest - strategy: - # HACK Remove after DSL2 rewrite is done - fail-fast: false - matrix: - # Nextflow versions - include: - # Test pipeline minimum Nextflow version - - NXF_VER: '21.10.3' - NXF_EDGE: '' - # Test latest edge release of Nextflow - - NXF_VER: '' - NXF_EDGE: '1' - test: - - 'aligner' - - 'annotation' - - 'default' - - 'split_fastq' - - 'gatk4_spark' - #- 'save_bam_mapped' - - 'skip_markduplicates' - - 'targeted' - - 'tumor_normal_pair' - - 'variant_calling' - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - env: - NXF_VER: ${{ matrix.NXF_VER }} - # Uncomment only if the edge release is more recent than the latest stable release - # See https://github.com/nextflow-io/nextflow/issues/2467 - # NXF_EDGE: ${{ matrix.NXF_EDGE }} - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - nextflow self-update - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.x' - - name: Install dependencies - run: python -m pip install --upgrade pip pytest-workflow - - - name: Run pipeline with tests settings - run: pytest --tag ${{ matrix.test }} --kwdof - - - name: Output log on failure - if: failure() - run: | - sudo apt install bat > /dev/null - batcat --decorations=always --color=always /home/runner/pytest_workflow_*/*/log.{out,err} - - - name: Upload logs on failure - if: failure() - uses: actions/upload-artifact@v2 - with: - name: logs-${{ matrix.profile }} - path: | - /home/runner/pytest_workflow_*/*/.nextflow.log - /home/runner/pytest_workflow_*/*/log.out - /home/runner/pytest_workflow_*/*/log.err - /home/runner/pytest_workflow_*/*/work - !/home/runner/pytest_workflow_*/*/work/conda - !/home/runner/pytest_workflow_*/*/work/singularity diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 0000000000..6adb0fff4b --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/cloudtest.yml b/.github/workflows/cloudtest.yml new file mode 100644 index 0000000000..53d6e5e295 --- /dev/null +++ b/.github/workflows/cloudtest.yml @@ -0,0 +1,149 @@ +name: nf-core cloud tests (both full size and smaller) +# This workflow is triggered on PRs opened against the main/master branch. +# It can be additionally triggered manually with GitHub actions workflow dispatch button. +# It runs the -profile 'test_full' on cloud +# or the -profile 'test' on cloud (smaller tests) + +on: + pull_request_review: + types: [submitted] + release: + types: [published] + workflow_dispatch: + inputs: + test: + description: "-profile test (smaller)" + type: boolean + default: true + somatic: + description: "Somatic full test" + type: boolean + default: false + germline: + description: "Germline full test" + type: boolean + default: false + germline_ncbench_agilent: + description: "Germline Agilent NCBench test" + type: boolean + default: false + +jobs: + setup-matrix: + name: Build test matrix + if: github.repository == 'nf-core/sarek' && (github.event.review.state == 'approved' && (github.event.pull_request.base.ref == 'master' || github.event.pull_request.base.ref == 'main') || github.event_name == 'workflow_dispatch' || github.event_name == 'release') + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.build-matrix.outputs.matrix }} + steps: + - name: Build matrix from inputs + id: build-matrix + run: | + # All available profiles + all='[ + { + "profile": "test_aws", + "test": "test", + "compute_env": "TOWER_COMPUTE_ENV", + "workdir": "TOWER_BUCKET_AWS" + }, + { + "profile": "test_full_aws", + "test": "somatic", + "compute_env": "TOWER_COMPUTE_ENV", + "workdir": "TOWER_BUCKET_AWS" + }, + { + "profile": "test_full_germline_aws", + "test": "germline", + "compute_env": "TOWER_COMPUTE_ENV", + "workdir": "TOWER_BUCKET_AWS" + }, + { + "profile": "test_full_germline_ncbench_agilent", + "test": "germline_ncbench_agilent", + "compute_env": "TOWER_COMPUTE_ENV", + "workdir": "TOWER_BUCKET_AWS" + } + ]' + + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + # Filter to only selected profiles + matrix=$(echo "$all" | jq -c '[.[] | select( + (.test == "test" and ${{ inputs.test }}) or + (.test == "somatic" and ${{ inputs.somatic }}) or + (.test == "germline" and ${{ inputs.germline }}) or + (.test == "germline_ncbench_agilent" and ${{ inputs.germline_ncbench_agilent }}) + )]') + else + # PR review or release: run all profiles + matrix=$(echo "$all" | jq -c '.') + fi + + echo "matrix={\"include\":$matrix}" >> "$GITHUB_OUTPUT" + + run-platform: + needs: setup-matrix + if: fromJson(needs.setup-matrix.outputs.matrix).include[0] != null + strategy: + fail-fast: false + matrix: ${{ fromJson(needs.setup-matrix.outputs.matrix) }} + name: Run ${{ matrix.profile }} + runs-on: ubuntu-latest + steps: + - name: Set revision variable + id: revision + run: | + echo "revision=${{ (github.event_name == 'workflow_dispatch' || github.event_name == 'release') && github.sha || 'dev' }}" >> "$GITHUB_OUTPUT" + + - name: Launch workflow via Seqera Platform + id: tower-launch + uses: seqeralabs/action-tower-launch@v2 + with: + run_name: sarek_${{ matrix.profile }} + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets[matrix.compute_env] }} + revision: ${{ steps.revision.outputs.revision }} + workdir: ${{ secrets[matrix.workdir] }}/work/sarek/work-${{ steps.revision.outputs.revision }}/${{ matrix.profile }} + parameters: | + { + "outdir": "${{ secrets[matrix.workdir] }}/sarek/results-${{ steps.revision.outputs.revision }}/${{ matrix.profile }}/" + } + nextflow_config: | + plugins { + id 'nf-slack@0.5.0' + } + slack { + enabled = true + bot { + token = '${{ secrets.NFSLACK_BOT_TOKEN }}' + channel = 'sarek_dev' + } + onStart { + enabled = false + } + onComplete { + message = ':white_check_mark: *sarek/${{ matrix.profile }}* completed successfully! :tada:' + } + onError { + message = ':x: *sarek/${{ matrix.profile }}* failed :crying_cat_face:' + } + } + profiles: ${{ matrix.profile }} + + - name: Comment Platform link on PR + if: github.event_name == 'pull_request_review' + env: + GH_TOKEN: ${{ github.token }} + run: | + gh pr comment ${{ github.event.pull_request.number }} \ + --repo ${{ github.repository }} \ + --body "Cloud test **${{ matrix.profile }}** launched on [Seqera Platform](${{ steps.tower-launch.outputs.workflowUrl }})" + + - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 + with: + name: Seqera Platform debug log file - ${{ matrix.profile }} + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 0000000000..45884ff900 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,134 @@ +name: Test successful pipeline download with 'nf-core pipelines download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to main/master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core pipelines download." + required: true + default: "dev" + pull_request: + branches: + - main + - master + +env: + NXF_ANSI_LOG: false + +jobs: + configure: + runs-on: ubuntu-latest + outputs: + REPO_LOWERCASE: ${{ steps.get_repo_properties.outputs.REPO_LOWERCASE }} + REPOTITLE_LOWERCASE: ${{ steps.get_repo_properties.outputs.REPOTITLE_LOWERCASE }} + REPO_BRANCH: ${{ steps.get_repo_properties.outputs.REPO_BRANCH }} + steps: + - name: Get the repository name and current branch + id: get_repo_properties + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> "$GITHUB_OUTPUT" + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> "$GITHUB_OUTPUT" + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> "$GITHUB_OUTPUT" + + download: + runs-on: ubuntu-latest + needs: configure + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 + with: + python-version: "3.14" + architecture: "x64" + + - name: Setup Apptainer + uses: eWaterCycle/setup-apptainer@4bb22c52d4f63406c49e94c804632975787312b3 # v2.0.0 + with: + apptainer-version: 1.3.4 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git + + - name: Make a cache directory for the container images + run: | + mkdir -p ./singularity_container_images + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + run: | + nf-core pipelines download ${{ needs.configure.outputs.REPO_LOWERCASE }} \ + --revision ${{ needs.configure.outputs.REPO_BRANCH }} \ + --outdir ./${{ needs.configure.outputs.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "community.wave.seqera.io/library/" \ + --container-cache-utilisation 'amend' \ + --download-configuration 'yes' + + - name: Inspect download + run: tree ./${{ needs.configure.outputs.REPOTITLE_LOWERCASE }} + + - name: Inspect container images + run: tree ./singularity_container_images | tee ./container_initial + + - name: Count the downloaded number of container images + id: count_initial + run: | + image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) + echo "Initial container image count: $image_count" + echo "IMAGE_COUNT_INITIAL=$image_count" >> "$GITHUB_OUTPUT" + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{needs.configure.outputs.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ needs.configure.outputs.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ steps.stub_run_pipeline.outcome == 'failure' }} + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ needs.configure.outputs.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ needs.configure.outputs.REPO_BRANCH }}) -profile test,singularity --outdir ./results + + - name: Count the downloaded number of container images + id: count_afterwards + run: | + image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) + echo "Post-pipeline run container image count: $image_count" + echo "IMAGE_COUNT_AFTER=$image_count" >> "$GITHUB_OUTPUT" + + - name: Compare container image counts + id: count_comparison + run: | + if [ "${{ steps.count_initial.outputs.IMAGE_COUNT_INITIAL }}" -ne "${{ steps.count_afterwards.outputs.IMAGE_COUNT_AFTER }}" ]; then + initial_count=${{ steps.count_initial.outputs.IMAGE_COUNT_INITIAL }} + final_count=${{ steps.count_afterwards.outputs.IMAGE_COUNT_AFTER }} + difference=$((final_count - initial_count)) + echo "$difference additional container images were \n downloaded at runtime . The pipeline has no support for offline runs!" + tree ./singularity_container_images > ./container_afterwards + diff ./container_initial ./container_afterwards + exit 1 + else + echo "The pipeline can be downloaded successfully!" + fi + + - name: Upload Nextflow logfile for debugging purposes + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 + with: + name: nextflow_logfile.txt + path: .nextflow.log* + include-hidden-files: true diff --git a/.github/workflows/fix_linting.yml b/.github/workflows/fix_linting.yml new file mode 100644 index 0000000000..8957338b18 --- /dev/null +++ b/.github/workflows/fix_linting.yml @@ -0,0 +1,89 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + fix-linting: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'nf-core/sarek' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + # Install and run pre-commit + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 + with: + python-version: "3.14" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true + + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" + + - name: Commit & push changes + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix code linting" + git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/sarek/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 3b448773c4..7a527a3464 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,133 +1,69 @@ name: nf-core linting # This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure that the code meets the nf-core guidelines +# It runs the `nf-core pipelines lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. on: - push: pull_request: release: types: [published] jobs: - Markdown: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-node@v1 - with: - node-version: '10' - - name: Install markdownlint - run: npm install -g markdownlint-cli - - name: Run Markdownlint - run: markdownlint . + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.14 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - message: | - ## Markdown linting is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install `markdownlint-cli` - * On Mac: `brew install markdownlint-cli` - * Everything else: [Install `npm`](https://www.npmjs.com/get-npm) then [install `markdownlint-cli`](https://www.npmjs.com/package/markdownlint-cli) (`npm install -g markdownlint-cli`) - * Fix the markdown errors - * Automatically: `markdownlint . --fix` - * Manually resolve anything left from `markdownlint .` - - Once you push these changes the test should pass, and you can hide this comment :+1: - - We highly recommend setting up markdownlint in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + python-version: "3.14" - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Install pre-commit + run: pip install pre-commit - EditorConfig: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - - uses: actions/setup-node@v1 - with: - node-version: '10' - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(git ls-files | grep -v test) - - YAML: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v1 - - uses: actions/setup-node@v1 - with: - node-version: '10' - - name: Install yaml-lint - run: npm install -g yaml-lint - - name: Run yaml-lint - run: yamllint $(find ${GITHUB_WORKSPACE} -type f -name "*.yml" -o -name "*.yaml") - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 - with: - message: | - ## YAML linting is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install `yaml-lint` - * [Install `npm`](https://www.npmjs.com/get-npm) then [install `yaml-lint`](https://www.npmjs.com/package/yaml-lint) (`npm install -g yaml-lint`) - * Fix the markdown errors - * Run the test locally: `yamllint $(find . -type f -name "*.yml" -o -name "*.yaml")` - * Fix any reported errors in your YAML files - - Once you push these changes the test should pass, and you can hide this comment :+1: - - We highly recommend setting up yaml-lint in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! - - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@v1 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: '3.6' - architecture: 'x64' + python-version: "3.14" + architecture: "x64" + + - name: read .nf-core.yml + uses: pietrobolcato/action-read-yaml@9f13718d61111b69f30ab4ac683e67a56d254e1d # 1.1.0 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml - name: Install dependencies run: | python -m pip install --upgrade pip - pip install nf-core + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Run nf-core pipelines lint + if: ${{ github.base_ref != 'master' }} + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - - name: Run nf-core lint + - name: Run nf-core pipelines lint --release + if: ${{ github.base_ref == 'master' }} env: GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + run: nf-core -l lint_log.txt pipelines lint --release --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - name: Save PR number if: ${{ always() }} @@ -135,11 +71,10 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: linting-logs path: | lint_log.txt lint_results.md PR_number.txt - diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 44d72994b0..e6e9bc269c 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -1,4 +1,3 @@ - name: nf-core linting comment # This workflow is triggered after the linting action is complete # It posts an automated comment to the PR, even if the PR is coming from a fork @@ -12,19 +11,18 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@ac66b43f0e6a346234dd65d4d0c8fbb31cb316e5 # v11 with: workflow: linting.yml workflow_conclusion: completed - name: Get PR number id: pr_number - run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)" + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@773744901bac0e8cbb5a0dc842800d45e9b2b405 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} path: linting-logs/lint_results.md - diff --git a/.github/workflows/local_modules.yml b/.github/workflows/local_modules.yml deleted file mode 100644 index 838351536d..0000000000 --- a/.github/workflows/local_modules.yml +++ /dev/null @@ -1,100 +0,0 @@ -name: Local Modules pytest-workflow -on: [push, pull_request] - -jobs: - changes: - name: Check for changes - runs-on: ubuntu-latest - outputs: - # Expose matched filters as job 'modules' output variable - modules: ${{ steps.filter.outputs.changes }} - steps: - - uses: actions/checkout@v2 - - uses: dorny/paths-filter@v2 - id: filter - with: - filters: 'tests/config/pytest_software.yml' - - test: - runs-on: ubuntu-latest - name: ${{ matrix.tags }} ${{ matrix.profile }} ${{ matrix.nxf_version }} - needs: changes - if: needs.changes.outputs.modules != '[]' - strategy: - fail-fast: false - matrix: - nxf_version: ['21.04.0'] - tags: ['${{ fromJson(needs.changes.outputs.modules) }}'] - profile: ['docker', 'singularity'] ## 'conda' - env: - NXF_ANSI_LOG: false - steps: - - uses: actions/checkout@v2 - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.x' - - - uses: actions/cache@v2 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip- - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.x' - - - name: Install Python dependencies - run: python -m pip install --upgrade pip pytest-workflow - - - uses: actions/cache@v2 - with: - path: /usr/local/bin/nextflow - key: ${{ runner.os }}-nextflow-${{ matrix.nxf_version }} - restore-keys: | - ${{ runner.os }}-nextflow- - - - name: Install Nextflow - env: - NXF_VER: ${{ matrix.nxf_version }} - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - - name: Set up Singularity - if: matrix.profile == 'singularity' - uses: eWaterCycle/setup-singularity@v5 - with: - singularity-version: 3.7.1 - - - name: Setup miniconda - if: matrix.profile == 'conda' - uses: conda-incubator/setup-miniconda@v2 - with: - auto-update-conda: true - channels: conda-forge,bioconda,defaults - python-version: ${{ matrix.python-version }} - - - name: Conda clean - if: matrix.profile == 'conda' - run: conda clean -a - - # Test the module - - name: Run pytest-workflow - # only use one thread for pytest-workflow to avoid race condition on conda cache. - run: TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.tags }} --symlink --kwdof - - - name: Upload logs on failure - if: failure() - uses: actions/upload-artifact@v2 - with: - name: logs-${{ matrix.tags }}-${{ matrix.profile }}-${{ matrix.nxf_version }} - path: | - /home/runner/pytest_workflow_*/*/.nextflow.log - /home/runner/pytest_workflow_*/*/log.out - /home/runner/pytest_workflow_*/*/log.err diff --git a/.github/workflows/ncbench.yml b/.github/workflows/ncbench.yml new file mode 100644 index 0000000000..efa9682b4e --- /dev/null +++ b/.github/workflows/ncbench.yml @@ -0,0 +1,119 @@ +name: NCBench Zenodo Upload +# This workflow can be triggered manually with the GitHub actions workflow dispatch button. + +on: + # upload can only be triggered manually for now + workflow_dispatch: + inputs: + germline_default_agilent: + description: "Trigger NCBench upload" + type: boolean + default: true + +jobs: + ncbench-upload: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + # TODO: Add in other test profiles + - profile: germline_default_agilent + enabled: ${{ ( github.repository == 'nf-core/sarek' ) && ( github.event_name != 'workflow_dispatch' || inputs.germline_default_agilent ) }} + steps: + - name: Download pipeline + uses: actions/checkout@v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + + - name: Extract Versions + id: extract_versions + run: | + PIPELINE_VERSION=$(awk -F"'" '/manifest {/,/version/ {if(/version/) print $2}' nextflow.config) + echo "PIPELINE_VERSION=${PIPELINE_VERSION}" >> $GITHUB_ENV + echo "PIPELINE_VERSION_NO_DOTS=$(echo $PIPELINE_VERSION | tr -d '.')" >> $GITHUB_ENV + + nextflow inspect -profile test_full_germline_ncbench_agilent . --outdir ./inspect >> inspect.json + + FASTP_VERSION=$(jq -r '.processes[] | select(.name == "NFCORE_SAREK:SAREK:FASTP") | .container' inspect.json | awk -F':' '{print $NF}' | cut -d "-" -f 1) + echo "FASTP_VERSION=${FASTP_VERSION}" >> $GITHUB_ENV + + BWA_VERSION=$(jq -r '.processes[] | select(.name == "NFCORE_SAREK:SAREK:PREPARE_GENOME:BWAMEM1_INDEX") | .container' inspect.json | awk -F':' '{print $NF}' | cut -d "-" -f 1) + echo "BWA_VERSION=${BWA_VERSION}" >> $GITHUB_ENV + + BQSR_VERSION=$(jq -r '.processes[] | select(.name == "NFCORE_SAREK:SAREK:BAM_BASERECALIBRATOR:GATK4_BASERECALIBRATOR") | .container' inspect.json | awk -F':' '{print $NF}' | cut -d "-" -f 1) + echo "BQSR_VERSION=${BQSR_VERSION}" >> $GITHUB_ENV + + DEEPVARIANT_VERSION=$(jq -r '.processes[] | select(.name == "NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_DEEPVARIANT:DEEPVARIANT") | .container' inspect.json | awk -F':' '{print $NF}' | cut -d "-" -f 1) + echo "DEEPVARIANT_VERSION=${DEEPVARIANT_VERSION}" >> $GITHUB_ENV + + FREEBAYES_VERSION=$(jq -r '.processes[] | select(.name == "NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_FREEBAYES:FREEBAYES") | .container' inspect.json | awk -F':' '{print $NF}' | cut -d "-" -f 1) + echo "FREEBAYES_VERSION=${FREEBAYES_VERSION}" >> $GITHUB_ENV + + HAPLOTYPECALLER_VERSION=$(jq -r '.processes[] | select(.name == "NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_HAPLOTYPECALLER:GATK4_HAPLOTYPECALLER") | .container' inspect.json | awk -F':' '{print $NF}' | cut -d "-" -f 1) + echo "HAPLOTYPECALLER_VERSION=${HAPLOTYPECALLER_VERSION}" >> $GITHUB_ENV + + STRELKA_VERSION=$(jq -r '.processes[] | select(.name == "NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_SINGLE_STRELKA:STRELKA_SINGLE") | .container' inspect.json | awk -F':' '{print $NF}' | cut -d "-" -f 1) + echo "STRELKA_VERSION=${STRELKA_VERSION}" >> $GITHUB_ENV + + - name: Download files from AWS + uses: keithweaver/aws-s3-github-action@v1.0.0 + if: ${{ matrix.enabled }} + with: + source: "s3://${{ secrets.AWS_S3_BUCKET }}/sarek/results-${{ github.sha }}/test_full_germline_ncbench_agilent/variant_calling" + command: sync + aws_region: eu-west-1 + destination: ./variant_calling + flags: --no-sign-request --include ".vcf.gz" --exclude "g.vcf.gz" + + - name: Install requests + run: pip install requests + + - name: Upload to zenodo + id: upload_zenodo + env: + # TODO ATTENTION: Use SANDBOX TOKEN during development: ${{ secrets.NCBENCH_CI_TOKEN_SANDBOX }} + ACCESS_TOKEN: ${{ secrets.ZENODO_DEPOSIT }} + PIPELINE_VERSION: ${{ env.PIPELINE_VERSION }} + run: python .github/workflows/upload.py + + - name: Store Zenodo deposition ID + run: | + echo "DEPOSITION_ID=$(cat deposition_id.txt)" >> $GITHUB_ENV + + # update ncbench config yml + - name: NCBench set config + uses: actions/checkout@v4 + with: + token: ${{ secrets.NCBENCH_CI_TOKEN }} + repository: FriederikeHanssen/ncbench-workflow + path: ncbench-workflow/ + ref: main + + - name: Install yq + uses: mikefarah/yq@master + + - name: Add entry to config + env: + PIPELINE_VERSION_NO_DOTS: ${{ env.PIPELINE_VERSION_NO_DOTS}} + PIPELINE_VERSION: ${{ env.PIPELINE_VERSION }} + FASTP_VERSION: ${{ env.FASTP_VERSION }} + BWA_VERSION: ${{ env.BWA_VERSION }} + BQSR_VERSION: ${{ env.BQSR_VERSION }} + DEEPVARIANT_VERSION: ${{ env.DEEPVARIANT_VERSION }} + FREEBAYES_VERSION: ${{ env.FREEBAYES_VERSION }} + HAPLOTYPECALLER_VERSION: ${{ env.HAPLOTYPECALLER_VERSION }} + STRELKA_VERSION: ${{ env.STRELKA_VERSION }} + DEPOSITION_ID: ${{ env.DEPOSITION_ID }} + run: bash .github/workflows/set_ncbench_config.sh + + - name: Commit files + uses: EndBug/add-and-commit@v9 + with: + cwd: "./ncbench-workflow" + author_name: "FriederikeHanssen" + author_email: "FriederikeHanssen@users.noreply.github.com" + new_branch: "sarek_${{ env.PIPELINE_VERSION }}" + message: "Add changes" + add: "config/config.yaml" diff --git a/.github/workflows/nf-test-gpu.yml b/.github/workflows/nf-test-gpu.yml new file mode 100644 index 0000000000..6d792274a8 --- /dev/null +++ b/.github/workflows/nf-test-gpu.yml @@ -0,0 +1,154 @@ +name: Run nf-test on GPU +on: + pull_request: + paths-ignore: + - "docs/**" + - "**/meta.yml" + - "**/*.md" + - "**/*.png" + - "**/*.svg" + release: + types: [published] + workflow_dispatch: + +# Cancel if a newer run is started +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NFT_VER: "0.9.3" + # NFT_WORKDIR: "~" + NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity +jobs: + nf-test-gpu-changes: + name: nf-test-gpu-changes + runs-on: # use self-hosted runners + - runs-on=${{ github.run_id }}-nf-test-gpu-changes + - runner=4cpu-linux-x64 + outputs: + shard: ${{ steps.set-shards.outputs.shard }} + total_shards: ${{ steps.set-shards.outputs.total_shards }} + steps: + - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner + run: | + ls -la ./ + rm -rf ./* || true + rm -rf ./.??* || true + ls -la ./ + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + with: + fetch-depth: 0 + + - name: get number of shards + id: set-shards + uses: ./.github/actions/get-shards + env: + NFT_VER: ${{ env.NFT_VER }} + with: + tags: "gpu" + max_shards: 2 + + - name: debug + run: | + echo ${{ steps.set-shards.outputs.shard }} + echo ${{ steps.set-shards.outputs.total_shards }} + + nf-test-gpu: + name: "GPU | ${{ matrix.profile }} | ${{ matrix.NXF_VER }} | ${{ matrix.shard }}/${{ needs.nf-test-gpu-changes.outputs.total_shards }}" + needs: [nf-test-gpu-changes] + if: ${{ needs.nf-test-gpu-changes.outputs.total_shards != '0' }} + runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64" + strategy: + fail-fast: false + matrix: + shard: ${{ fromJson(needs.nf-test-gpu-changes.outputs.shard) }} + # Note: at the moment, in sarek no processes using gpu are supported with conda + # Which is why the profile matrix is limited to docker and singularity + profile: [docker, singularity] + isMain: + - ${{ github.base_ref == 'master' || github.base_ref == 'main' }} + # Exclude conda and singularity on dev + exclude: + - isMain: false + profile: "conda" + - isMain: false + profile: "singularity" + NXF_VER: + - 25.10.2 + - latest-everything + env: + NXF_ANSI_LOG: false + TOTAL_SHARDS: ${{ needs.nf-test-gpu-changes.outputs.total_shards }} + + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + with: + fetch-depth: 0 + + - name: Test CUDA + run: | + nvidia-smi -L + + - name: Set NFT_WORKDIR dynamically + run: echo "NFT_WORKDIR=$HOME" >> $GITHUB_ENV + + - name: Run nf-test on GPU + id: run_nf_test_gpu + uses: ./.github/actions/nf-test + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} + env: + NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + NXF_VERSION: ${{ matrix.NXF_VER }} + SENTIEON_AUTH_MECH: "GitHub Actions - token" + SENTIEON_ENCRYPTION_KEY: ${{ secrets.SENTIEON_ENCRYPTION_KEY }} + SENTIEON_LICENSE_MESSAGE: ${{ secrets.SENTIEON_LICENSE_MESSAGE }} + SENTIEON_LICSRVR_IP: ${{ secrets.SENTIEON_LICSRVR_IP }} + with: + profile: ${{ matrix.profile }} + shard: ${{ matrix.shard }} + total_shards: ${{ env.TOTAL_SHARDS }} + tags: ${{ matrix.profile == 'conda' && 'gpu_conda' || 'gpu,gpu_conda' }} + - name: Report test status + if: ${{ always() }} + run: | + if [[ "${{ steps.run_nf_test_gpu.outcome }}" == "failure" ]]; then + echo "::error::Test with ${{ matrix.NXF_VER }} failed" + # Add to workflow summary + echo "## ❌ Test failed: ${{ matrix.profile }} | ${{ matrix.NXF_VER }} | Shard ${{ matrix.shard }}/${{ env.TOTAL_SHARDS }}" >> $GITHUB_STEP_SUMMARY + if [[ "${{ matrix.NXF_VER }}" == "latest-everything" ]]; then + echo "::warning::Test with latest-everything failed but will not cause workflow failure. Please check if the error is expected or if it needs fixing." + fi + if [[ "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then + exit 1 + fi + fi + confirm-pass: + needs: [nf-test-gpu] + if: always() + runs-on: # use self-hosted runners + - runs-on=${{ github.run_id }}-confirm-pass + - runner=2cpu-linux-x64 + steps: + - name: One or more tests failed (excluding latest-everything) + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 + + - name: One or more tests cancelled + if: ${{ contains(needs.*.result, 'cancelled') }} + run: exit 1 + + - name: All tests ok + if: ${{ contains(needs.*.result, 'success') }} + run: exit 0 + + - name: debug-print + if: always() + run: | + echo "::group::DEBUG: `needs` Contents" + echo "DEBUG: toJSON(needs) = ${{ toJSON(needs) }}" + echo "DEBUG: toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" + echo "::endgroup::" diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml new file mode 100644 index 0000000000..38d8a3fbdd --- /dev/null +++ b/.github/workflows/nf-test.yml @@ -0,0 +1,151 @@ +name: Run nf-test +on: + pull_request: + paths-ignore: + - "docs/**" + - "**/meta.yml" + - "**/*.md" + - "**/*.png" + - "**/*.svg" + release: + types: [published] + workflow_dispatch: + +# Cancel if a newer run is started +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NFT_VER: "0.9.3" + # NFT_WORKDIR: "~" + NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity +jobs: + nf-test-changes: + name: nf-test-changes + runs-on: # use self-hosted runners + - runs-on=${{ github.run_id }}-nf-test-changes + - runner=4cpu-linux-x64 + outputs: + shard: ${{ steps.set-shards.outputs.shard }} + total_shards: ${{ steps.set-shards.outputs.total_shards }} + steps: + - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner + run: | + ls -la ./ + rm -rf ./* || true + rm -rf ./.??* || true + ls -la ./ + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + with: + fetch-depth: 0 + + - name: get number of shards + id: set-shards + uses: ./.github/actions/get-shards + env: + NFT_VER: ${{ env.NFT_VER }} + with: + tags: "cpu,cpu_conda" + max_shards: 15 + + - name: debug + run: | + echo ${{ steps.set-shards.outputs.shard }} + echo ${{ steps.set-shards.outputs.total_shards }} + + nf-test: + name: "${{ matrix.profile }} | ${{ matrix.NXF_VER }} | ${{ matrix.shard }}/${{ needs.nf-test-changes.outputs.total_shards }}" + needs: [nf-test-changes] + if: ${{ needs.nf-test-changes.outputs.total_shards != '0' }} + runs-on: # use self-hosted runners + - runs-on=${{ github.run_id }}-nf-test + - runner=4cpu-linux-x64 + - volume=80gb + strategy: + fail-fast: false + matrix: + shard: ${{ fromJson(needs.nf-test-changes.outputs.shard) }} + profile: [conda, docker, singularity] + isMain: + - ${{ github.base_ref == 'master' || github.base_ref == 'main' }} + # Exclude conda and singularity on dev + exclude: + - isMain: false + profile: "conda" + - isMain: false + profile: "singularity" + NXF_VER: + - 25.10.2 + - latest-everything + env: + NXF_ANSI_LOG: false + TOTAL_SHARDS: ${{ needs.nf-test-changes.outputs.total_shards }} + + steps: + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + with: + fetch-depth: 0 + + - name: Set NFT_WORKDIR dynamically + run: echo "NFT_WORKDIR=$HOME" >> $GITHUB_ENV + + - name: Run nf-test + id: run_nf_test + uses: ./.github/actions/nf-test + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} + env: + NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + NXF_VERSION: ${{ matrix.NXF_VER }} + SENTIEON_AUTH_MECH: "GitHub Actions - token" + SENTIEON_ENCRYPTION_KEY: ${{ secrets.SENTIEON_ENCRYPTION_KEY }} + SENTIEON_LICENSE_MESSAGE: ${{ secrets.SENTIEON_LICENSE_MESSAGE }} + SENTIEON_LICSRVR_IP: ${{ secrets.SENTIEON_LICSRVR_IP }} + with: + profile: ${{ matrix.profile }} + shard: ${{ matrix.shard }} + total_shards: ${{ env.TOTAL_SHARDS }} + tags: ${{ matrix.profile == 'conda' && 'cpu_conda' || 'cpu,cpu_conda' }} + - name: Report test status + if: ${{ always() }} + run: | + if [[ "${{ steps.run_nf_test.outcome }}" == "failure" ]]; then + echo "::error::Test with ${{ matrix.NXF_VER }} failed" + # Add to workflow summary + echo "## ❌ Test failed: ${{ matrix.profile }} | ${{ matrix.NXF_VER }} | Shard ${{ matrix.shard }}/${{ env.TOTAL_SHARDS }}" >> $GITHUB_STEP_SUMMARY + if [[ "${{ matrix.NXF_VER }}" == "latest-everything" ]]; then + echo "::warning::Test with latest-everything failed but will not cause workflow failure. Please check if the error is expected or if it needs fixing." + fi + if [[ "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then + exit 1 + fi + fi + confirm-pass: + needs: [nf-test] + if: always() + runs-on: # use self-hosted runners + - runs-on=${{ github.run_id }}-confirm-pass + - runner=2cpu-linux-x64 + steps: + - name: One or more tests failed (excluding latest-everything) + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 + + - name: One or more tests cancelled + if: ${{ contains(needs.*.result, 'cancelled') }} + run: exit 1 + + - name: All tests ok + if: ${{ contains(needs.*.result, 'success') }} + run: exit 0 + + - name: debug-print + if: always() + run: | + echo "::group::DEBUG: `needs` Contents" + echo "DEBUG: toJSON(needs) = ${{ toJSON(needs) }}" + echo "DEBUG: toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" + echo "::endgroup::" diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml new file mode 100644 index 0000000000..431d3d4457 --- /dev/null +++ b/.github/workflows/release-announcements.yml @@ -0,0 +1,46 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" | sed 's/-//g' >> $GITHUB_OUTPUT + + - name: get description + id: get_description + run: | + echo "description=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .description')" >> $GITHUB_OUTPUT + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + ${{ steps.get_description.outputs.description }} + Please see the changelog: ${{ github.event.release.html_url }} + + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@6461056ea355ea43b977e149f7bf76aaa572e5e8 # v0.3.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.github/workflows/set_ncbench_config.sh b/.github/workflows/set_ncbench_config.sh new file mode 100644 index 0000000000..887c253bf9 --- /dev/null +++ b/.github/workflows/set_ncbench_config.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# This script updates the config.yml in the NCBench repository. +# The config file is needed to trigger the subsequent benchmarking workflow. +# For each variant caller and each input file a new entry needs to be created. + +declare -A variant_callers=( + ["deepvariant"]="NA12878_%sM.deepvariant.vcf.gz" + ["freebayes"]="NA12878_%sM.freebayes.vcf.gz" + ["haplotypecaller"]="NA12878_%sM.haplotypecaller.filtered.vcf.gz" + ["strelka2"]="NA12878_%sM.strelka.variants.vcf.gz" +) + +declare -A variant_versions=( + ["deepvariant"]="${DEEPVARIANT_VERSION}" + ["freebayes"]="${FREEBAYES_VERSION}" + ["haplotypecaller"]="${HAPLOTYPECALLER_VERSION}" + ["strelka2"]="${STRELKA_VERSION}" +) + +for READS in 75 200; do + for variant_caller in "${!variant_callers[@]}"; do + filename=$(printf "${variant_callers[$variant_caller]}" $READS $READS) + yq --inplace ' + with(.variant-calls.nf-core-sarek-'"${PIPELINE_VERSION_NO_DOTS}"'-'"${variant_caller}"'-agilent-'"${READS}"'M.labels; + .site = "nf-core" | + .pipeline = "nf-core/sarek v'"${PIPELINE_VERSION}"'" | + .trimming = "FastP v'"${FASTP_VERSION}"'" | + .read-mapping = "bwa mem v'"${BWA_VERSION}"'" | + .base-quality-recalibration = "gatk4 v'"${BQSR_VERSION}"'" | + .realignment = "none" | + .variant-detection = "'${variant_caller}' v'"${variant_versions[$variant_caller]}"'" | + .genotyping = "none" | + .reads = "'"${READS}"'M" ) | + with(.variant-calls.nf-core-sarek-'"${PIPELINE_VERSION_NO_DOTS}"'-'"${variant_caller}"'-agilent-'"${READS}"'M.subcategory; + . = "NA12878-agilent" ) | + with(.variant-calls.nf-core-sarek-'"${PIPELINE_VERSION_NO_DOTS}"'-'"${variant_caller}"'-agilent-'"${READS}"'M.zenodo; + .deposition = '"${DEPOSITION_ID}"' | + .filename= "'"${filename}"'" ) | + with(.variant-calls.nf-core-sarek-'"${PIPELINE_VERSION_NO_DOTS}"'-'"${variant_caller}"'-agilent-'"${READS}"'M.benchmark; + . = "giab-NA12878-agilent-'"${READS}"'M" ) | + with(.variant-calls.nf-core-sarek-'"${PIPELINE_VERSION_NO_DOTS}"'-'"${variant_caller}"'-agilent-'"${READS}"'M.rename-contigs; + . = "resources/rename-contigs/ucsc-to-ensembl.txt" ) + ' ncbench-workflow/config/config.yaml + done +done diff --git a/.github/workflows/template-version-comment.yml b/.github/workflows/template-version-comment.yml new file mode 100644 index 0000000000..e8560fc7c9 --- /dev/null +++ b/.github/workflows/template-version-comment.yml @@ -0,0 +1,46 @@ +name: nf-core template version comment +# This workflow is triggered on PRs to check if the pipeline template version matches the latest nf-core version. +# It posts a comment to the PR, even if it comes from a fork. + +on: pull_request_target + +jobs: + template_version: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Read template version from .nf-core.yml + uses: nichmor/minimal-read-yaml@1f7205277e25e156e1f63815781db80a6d490b8f # v0.0.2 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + + - name: Install nf-core + run: | + python -m pip install --upgrade pip + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Check nf-core outdated + id: nf_core_outdated + run: echo "OUTPUT=$(pip list --outdated | grep nf-core)" >> ${GITHUB_ENV} + + - name: Post nf-core template version comment + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + if: | + contains(env.OUTPUT, 'nf-core') + with: + repo-token: ${{ secrets.NF_CORE_BOT_AUTH_TOKEN }} + allow-repeats: false + message: | + > [!WARNING] + > Newer version of the nf-core template is available. + > + > Your pipeline is using an old version of the nf-core template: ${{ steps.read_yml.outputs['nf_core_version'] }}. + > Please update your pipeline to the latest version. + > + > For more documentation on how to update your pipeline, please see the [nf-core documentation](https://github.com/nf-core/tools?tab=readme-ov-file#sync-a-pipeline-with-the-template) and [Synchronisation documentation](https://nf-co.re/docs/contributing/sync). + # diff --git a/.github/workflows/upload.py b/.github/workflows/upload.py new file mode 100644 index 0000000000..fcd7eeb1e9 --- /dev/null +++ b/.github/workflows/upload.py @@ -0,0 +1,101 @@ +import logging +import requests +import os +import json + +""" +This scripts collects all variant calling files and uploads them to Zenodo. +1. A new Zenodo entry is created +2. All files are uploaded +3. Meta data is added: Pipeline version, authors +4. Entry is published. + +ATTENTION: Use sandbox links during development! They are set in each affected line as comment. + If you need to use the production Zenodo links, turn off publishing (see bottom). +""" + +headers = {"Content-Type": "application/json"} +access_token = os.environ["ACCESS_TOKEN"] +params = {"access_token": access_token} +workspace_directory = os.environ["GITHUB_WORKSPACE"] +pipeline_version = os.environ["PIPELINE_VERSION"] + +# TODO: replace sandbox link f"https://sandbox.zenodo.org/api/deposit/depositions" +url = f"https://zenodo.org/api/deposit/depositions" + +# Create empty upload +try: + r = requests.post(url, params=params, json={}, headers=headers) + r.raise_for_status() +except requests.exceptions.RequestException as e: + raise SystemExit(e) + +logging.info("Create empty upload:\n") +logging.info(r.json()) +logging.info(r.status_code) + +deposition_id = r.json()["id"] + +## Store deposition ID +with open("deposition_id.txt", "w") as f: + f.write(str(deposition_id)) + +# Upload a new file +bucket_url = r.json()["links"]["bucket"] + +filenames = [ + "deepvariant/NA12878_75M/NA12878_75M.deepvariant.vcf.gz", + "freebayes/NA12878_75M/NA12878_75M.freebayes.vcf.gz", + "haplotypecaller/NA12878_75M/NA12878_75M.haplotypecaller.filtered.vcf.gz", + "strelka/NA12878_75M/NA12878_75M.strelka.variants.vcf.gz", + "deepvariant/NA12878_200M/NA12878_200M.deepvariant.vcf.gz", + "freebayes/NA12878_200M/NA12878_200M.freebayes.vcf.gz", + "haplotypecaller/NA12878_200M/NA12878_200M.haplotypecaller.filtered.vcf.gz", + "strelka/NA12878_200M/NA12878_200M.strelka.variants.vcf.gz", +] + +for file in filenames: + path = "./variant_calling/%s" % file + with open(path, "rb") as fp: + r = requests.put( + "%s/%s" % (bucket_url, os.path.basename(file)), + data=fp, + params=params, + ) + logging.info(r.json()) + +# Add metadata to uploaded file +title = "WES benchmark results nf-core/sarek v{}".format(pipeline_version) +data = { + "metadata": { + "title": title, + "upload_type": "dataset", + "description": "Variant calling results on benchmarking datasets produced with nf-core/sarek", + "creators": [ + {"name": "Garcia, Maxime Ulysse", "affiliation": "Seqera, Barcelona"}, + {"name": "Hanssen, Friederike", "affiliation": "Quantitative Biology Center, Tuebingen"}, + ], + } +} + +# TODO replace sandbox link https://sandbox.zenodo.org/api/deposit/depositions/ https://zenodo.org/api/deposit/depositions/ +r = requests.put( + "https://zenodo.org/api/deposit/depositions/%s" % deposition_id, + params=params, + data=json.dumps(data), + headers=headers, +) + +logging.info("Add metadata: ") +logging.info(r.status_code) +logging.info(r.json()) + +# TODO only uncomment once everything works, replace sandbox link: "https://sandbox.zenodo.org/api/deposit/depositions/%s/actions/publish" +# Publish this +try: + r = requests.post("https://zenodo.org/api/deposit/depositions/%s/actions/publish" % deposition_id, params=params) + r.raise_for_status() +except requests.exceptions.RequestException as e: + raise SystemExit(e) +logging.info("Publish data status code: ") +logging.info(r.status_code) diff --git a/.gitignore b/.gitignore index 09ece0f765..e3aaef5619 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -*.code-workspace .nextflow* work/ data/ @@ -7,3 +6,13 @@ results/ testing/ testing* *.pyc +null/ +*.code-workspace +.nf-test* +.nf-test/ +test-datasets/ +test.tap +test.xml +.vscode/mcp.json +modules/nf-core/**/tests/ +subworkflows/nf-core/**/tests/ diff --git a/.gitpod.yml b/.gitpod.yml deleted file mode 100644 index 289c86e52f..0000000000 --- a/.gitpod.yml +++ /dev/null @@ -1,14 +0,0 @@ -image: nfcore/gitpod:latest - -vscode: - extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - - davidanson.vscode-markdownlint # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting - - oderwat.indent-rainbow # Highlight indentation level - - streetsidesoftware.code-spell-checker # Spelling checker for source code diff --git a/.markdownlint.yml b/.markdownlint.yml deleted file mode 100644 index 9e605fcfab..0000000000 --- a/.markdownlint.yml +++ /dev/null @@ -1,14 +0,0 @@ -# Markdownlint configuration file -default: true -line-length: false -ul-indent: - indent: 4 -no-duplicate-header: - siblings_only: true -no-inline-html: - allowed_elements: - - img - - p - - kbd - - details - - summary diff --git a/.nf-core.yml b/.nf-core.yml index b994ae2cce..ec357e372d 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,8 +1,26 @@ lint: + files_exist: + - .github/workflows/awsfulltest.yml + - .github/workflows/awstest.yml + - .github/workflows/ci.yml + - conf/modules.config files_unchanged: - - .github/workflows/linting.yml - - assets/multiqc_config.yaml + - .gitignore - assets/nf-core-sarek_logo_light.png - docs/images/nf-core-sarek_logo_dark.png - docs/images/nf-core-sarek_logo_light.png - - lib/NfcoreTemplate.groovy + modules_config: false + nf_test_content: false + schema_params: false + template_strings: false +nf_core_version: 3.5.1 +repository_type: pipeline +template: + author: Maxime Garcia, Szilveszter Juhos, Friederike Hanssen + description: An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing + force: false + is_nfcore: true + name: sarek + org: nf-core + outdir: . + version: 3.9.0dev diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000..d06777a8f7 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,27 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.1.0" + hooks: + - id: prettier + additional_dependencies: + - prettier@3.6.2 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + exclude: | + (?x)^( + .*ro-crate-metadata.json$| + modules/nf-core/.*| + subworkflows/nf-core/.*| + .*\.snap$ + )$ + - id: end-of-file-fixer + exclude: | + (?x)^( + .*ro-crate-metadata.json$| + modules/nf-core/.*| + subworkflows/nf-core/.*| + .*\.snap$ + )$ diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000000..63cde500ee --- /dev/null +++ b/.prettierignore @@ -0,0 +1,14 @@ +email_template.html +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc +bin/ +.nf-test/ +ro-crate-metadata.json +modules/nf-core/ +subworkflows/nf-core/ diff --git a/.prettierrc.yml b/.prettierrc.yml new file mode 100644 index 0000000000..07dbd8bb99 --- /dev/null +++ b/.prettierrc.yml @@ -0,0 +1,6 @@ +printWidth: 120 +tabWidth: 4 +overrides: + - files: "*.{md,yml,yaml,html,css,scss,js,cff}" + options: + tabWidth: 2 diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000..a33b527cc7 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "markdown.styles": ["public/vscode_markdown.css"] +} diff --git a/CHANGELOG.md b/CHANGELOG.md index 3df16de66d..cf940efe3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,14 +5,1490 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [dev](https://github.com/nf-core/sarek/tree/dev) +## dev ### Added -- [##448](https://github.com/nf-core/sarek/pull/448) - Allow to skip base quality recalibration with `--skip_bqsr` +- [#2087](https://github.com/nf-core/sarek/pull/2087) - Add `bam` as output format for parabricks/fq2bam, add multi lane support ### Changed +- [#2055](https://github.com/nf-core/sarek/pull/2055) - Sort final vcf in varlociraptor sbwfs and update varlociraptor +- [#2141](https://github.com/nf-core/sarek/pull/2141) - Update snpeff + +### Fixed + +- [#2117](https://github.com/nf-core/sarek/pull/2117) - Silent failure with multi-lane samples +- [#2143](https://github.com/nf-core/sarek/pull/2143) - Varlociraptor collecting multiple scenario files for one sample +- [#2146](https://github.com/nf-core/sarek/pull/2146) - Fail early when `--no_intervals` is used with joint germline HaplotypeCaller +- [#2147](https://github.com/nf-core/sarek/pull/2147) - Fix empty fastp output folder created when trimmed reads are not saved +- [#2152](https://github.com/nf-core/sarek/pull/2152) - Fix missing `params.` prefix for `umi_tag` in markduplicates config + +### Removed + +### Dependencies + +| Dependency | Old version | New version | +| ------------- | ----------- | ----------- | +| snpeff | 5.3a | 5.4a | +| varlociraptor | 8.7.4 | 8.9.3 | +| yte | 1.9.0 | 1.9.4 | + +### Parameters + +| Params | status | +| ------ | ------ | + +### Developer section + +#### Added + +#### Changed + +- [#2142](https://github.com/nf-core/sarek/pull/2142) - Replace custom Slack/Teams notifications with nf-slack plugin (v0.5.0) bot token auth, scoped entirely to CI cloud test workflow, Remove Azure cloud test profiles, use dynamic matrix for selective test dispatch, Fix cloud test to use secrets instead of vars for TOWER_BUCKET_AWS, TOWER_COMPUTE_ENV, and TOWER_WORKSPACE_ID +- [#2055](https://github.com/nf-core/sarek/pull/2055) - Update varlociraptor to use only one input channel, swap to topics +- [#2087](https://github.com/nf-core/sarek/pull/2087) - Move parabricks config into its own, adhere to strict syntax, swap to topics +- [#2139](https://github.com/nf-core/sarek/pull/2139) - Back to dev (3.9.0dev) +- [#2141](https://github.com/nf-core/sarek/pull/2141) - Update vcf_annotate_snpeff subworkflow, swap tabix/bgziptabix and snpeff to topics, strict syntax +- [#2159](https://github.com/nf-core/sarek/pull/2159) - Fix strict syntax errors + +#### Fixed + +- [#2117](https://github.com/nf-core/sarek/pull/2117) - Update alignment related files to strict syntax +- [#2129](https://github.com/nf-core/sarek/pull/2129) - Fix MuSE timestamp, swap to topics and change to strict syntax +- [#2165](https://github.com/nf-core/sarek/pull/2165) - Recover help message +- [#2167](https://github.com/nf-core/sarek/pull/2167) - Fix and extend pipeline level stub tests + +#### Removed + +## [3.8.1](https://github.com/nf-core/sarek/releases/tag/3.8.1) - Laitaure + +### Added + +### Changed + +### Fixed + +- [#2128](https://github.com/nf-core/sarek/pull/2128) - Fix `bcftools concat` failing on Strelka somatic VCFs with non-contiguous chromosome blocks by adding `--allow-overlaps` + +### Removed + +### Dependencies + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | + +### Parameters + +| Params | status | +| ------ | ------ | + +### Developer section + +#### Added + +#### Changed + +- [#2134](https://github.com/nf-core/sarek/pull/2134) - Back to dev (3.9.0dev) + +#### Fixed + +#### Removed + +## [3.8.0](https://github.com/nf-core/sarek/releases/tag/3.8.0) - Sitojaure + +A mountain cabin near the Sami settlement between Saltoluokta and Kvikkjokk. + +### Added + +- [#2092](https://github.com/nf-core/sarek/pull/2092) - Add support for VEP Condel plugin to calculate Consensus Deleteriousness scores for missense mutations +- [#2093](https://github.com/nf-core/sarek/pull/2093) - Add support for VEP Mastermind plugin to retrieve citation counts from Mastermind Genomic Search Engine +- [#2094](https://github.com/nf-core/sarek/pull/2094) - Add support for VEP Phenotypes plugin to retrieve overlapping phenotype information from Ensembl databases +- [#2103](https://github.com/nf-core/sarek/pull/2103) - Documentation update about new VEP plugins +- [#2111](https://github.com/nf-core/sarek/pull/2111) - Add SnpSift annotation support using `--tools snpsift` with `--snpsift_databases` CSV configuration + +### Changed + +- [#2119](https://github.com/nf-core/sarek/pull/2119) - Update VEP version from 111.0-0 to 115.0-0 and VEP cache version from 114 to 115; update ensemblvep module to include `perl-math-cdf` dependency for Condel plugin + +### Fixed + +- [#2077](https://github.com/nf-core/sarek/pull/2077) - Remove re-indexed bam from `indexcov` from publishing into top level `outdir` directory +- [#2083](https://github.com/nf-core/sarek/pull/2083) - Remove `exists` validation from `snpeff_cache` and `vep_cache` parameters to fix workflow launch failures when annotation tools are not used +- [#2095](https://github.com/nf-core/sarek/pull/2095) - Fix typo in consensus calling parameter reference (used non-existent `params.consensus_vcfs` instead of `params.snv_consensus_calling`) +- [#2096](https://github.com/nf-core/sarek/pull/2096) - Fix consensus calling to include missing variant callers (`bcftools`, `lofreq`) and correct Sentieon tool names ([#2088](https://github.com/nf-core/sarek/issues/2088)) +- [#2099](https://github.com/nf-core/sarek/pull/2099) - Remove deprecated `msisensor2_scan` parameter from schema and igenomes config to fix `nf-core pipelines schema build` validation error +- [#2100](https://github.com/nf-core/sarek/pull/2100) - Add missing citations for Condel, Mastermind, goleft indexcov, NGSCheckMate, SPRING, and vcflib to CITATIONS.md +- [#2109](https://github.com/nf-core/sarek/pull/2109) - Fix consensus calling to capture all variants from all callers by using `sites.txt` output; adds `CALLERS` and `NCALLERS` INFO fields to consensus VCF +- [#2113](https://github.com/nf-core/sarek/pull/2113) - Fix regex patterns for `dbnsfp_tbi`, `spliceai_snv`, and `spliceai_snv_tbi` parameter validation + +### Removed + +### Dependencies + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | + +### Parameters + +| Params | status | +| ---------------------------- | ------- | +| `--vep_condel` | added | +| `--condel_config` | added | +| `--vep_mastermind` | added | +| `--mastermind_file` | added | +| `--mastermind_mutations` | added | +| `--mastermind_var_iden` | added | +| `--mastermind_url` | added | +| `--vep_phenotypes` | added | +| `--phenotypes_file` | added | +| `--phenotypes_file_tbi` | added | +| `--phenotypes_include_types` | added | +| `--tools snpsift` | added | +| `--snpsift_databases` | added | +| `--msisensor2_scan` | removed | + +### Developer section + +#### Added + +#### Changed + +- [#2076](https://github.com/nf-core/sarek/pull/2076) - Back to Dev +- [#2098](https://github.com/nf-core/sarek/pull/2098) - Starting workflow output migration with multiqc +- [#2101](https://github.com/nf-core/sarek/pull/2101) - Prepare release 3.8.0 +- [#2126](https://github.com/nf-core/sarek/pull/2126) - Start versions migration to topics + +#### Fixed + +- [#2099](https://github.com/nf-core/sarek/pull/2099) - Fix `bbsplit.nf.test` input cardinality to match current PREPARE_GENOME subworkflow signature (31 parameters) +- [#2112](https://github.com/nf-core/sarek/pull/2112) - Fix BBSplit index building failure by adding null check for reads in `ext.prefix` configuration +- [#2120](https://github.com/nf-core/sarek/pull/2120) - Fix BBSplit index publish pattern from `bbmap` to `bbmap_index` to correctly save reference +- [#2104](https://github.com/nf-core/sarek/pull/2104) - Ignore warnings coming from singularity and conda +- [#2105](https://github.com/nf-core/sarek/pull/2105) - Ignore warnings coming from singularity +- [#2106](https://github.com/nf-core/sarek/pull/2106) - Fix conda setup in GHA + +#### Removed + +- [#2084](https://github.com/nf-core/sarek/pull/2084) - Removed no longer used old test profiles + +## [3.7.1](https://github.com/nf-core/sarek/releases/tag/3.7.1) - Buollámtjåhkka + +Buollámtjåhkka is the closest mountain to Saltoluokta and an easy peak to climb. + +### Added + +### Changed + +- [#2073](https://github.com/nf-core/sarek/pull/2073) - Update MultiQC to version 1.33 + +### Fixed + +- [#2069](https://github.com/nf-core/sarek/pull/2069) - Propagate tbi indices for HaplotypeCaller and Haplotyper to fix bug with merging vcfs and tbis. +- [#2071](https://github.com/nf-core/sarek/pull/2071) - Updated parameter validation to support the use of umi_read_structure along with umi_in_read_headers +- [#2073](https://github.com/nf-core/sarek/pull/2073) - Apply fix for BBSplit error: `unterminated s' command` +- [#2124](https://github.com/nf-core/sarek/pull/2124) - Fix FilterMutectCalls not running when starting from BAM with lane field in samplesheet + +### Removed + +### Dependencies + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| multiqc | 1.31 | 1.33 | + +### Parameters + +| Params | status | +| ------ | ------ | + +### Developer section + +#### Added + +- [#2144](https://github.com/nf-core/sarek/pull/2144) - Add developer guidelines document for human and AI contributors + +#### Changed + +- [#2067](https://github.com/nf-core/sarek/pull/2067) - Back to Dev + +#### Fixed + +#### Removed + +## [3.7.0](https://github.com/nf-core/sarek/releases/tag/3.7.0) - Saltoluokta + +Saltoluokta is a mountain lodge located in northern Sweden and a popular starting point into the Sarek Nationalpark. + +This release includes a bump to Nextflow 25.10.2. + +### Added + +- [#2044](https://github.com/nf-core/sarek/pull/2044) - Add filtering with `bcftools view -f PASS,.` following variantcalling step +- [#2049](https://github.com/nf-core/sarek/pull/2049) - Add consensus calling of small variant VCFs for variants called by x or more tools, with `x=2` as default + +### Changed + +- [#2045](https://github.com/nf-core/sarek/pull/2045) - Propagate fastp shard naming if exists through BBSplit to ensure unique naming in Markduplicates +- [#2065](https://github.com/nf-core/sarek/pull/2065) - Bump minimal Nextflow version to 25.10.2 + +### Fixed + +- [#2045](https://github.com/nf-core/sarek/pull/2045) - Propagate fastp shard naming if exists through BBSplit to ensure unique naming in Markduplicates +- [#2060](https://github.com/nf-core/sarek/pull/2060) - Update bbmap/bbsplit module to fix bbsplit index staging by using symlinks instead of full copy + +### Removed + +### Dependencies + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| nf-schema | 2.4.2 | 2.6.1 | + +### Parameters + +| Params | status | +| ---------------------------- | ------ | +| `--filter_vcfs` | New | +| `--bcftools_filter_criteria` | New | +| `--snv_consensus_calling` | New | +| `--consensus_min_count` | New | + +### Developer section + +#### Added + +#### Changed + +- [#1979](https://github.com/nf-core/sarek/pull/1979) - Update prepare_genome subworkflow to adhere to language server +- [#2041](https://github.com/nf-core/sarek/pull/2041) - Back to dev +- [#2043](https://github.com/nf-core/sarek/pull/2043) - Refactor postvariantcalling and split out varlociraptor from other options +- [#2048](https://github.com/nf-core/sarek/pull/2048) - No null value for snpeff_cache and vep_cache in tests +- [#2058](https://github.com/nf-core/sarek/pull/2058) - Template update for nf-core/tools v3.5.1 +- [#2080](https://github.com/nf-core/sarek/pull/2080) - Improve test suite and capture stdout/err logs more efficiently + +#### Fixed + +- [#2053](https://github.com/nf-core/sarek/pull/2053) - Change yte input to one channel to disambiguate scenario file rendering +- [#2054](https://github.com/nf-core/sarek/pull/2054) - Fix typo on tbi_sentieon_dnascope channel +- [#2065](https://github.com/nf-core/sarek/pull/2065) - Bump nf-schema to 2.6.1, due to [nf-schema#181](https://github.com/nextflow-io/nf-schema/issues/181) + +#### Removed + +## [3.6.1](https://github.com/nf-core/sarek/releases/tag/3.6.1) - Sjnjierák + +Sjnjierák is a popular stopover cabin on the way into the park. + +This patch release includes a bump to Nextflow 25.04.8. + +### Added + +### Changed + +### Fixed + +- [2029](https://github.com/nf-core/sarek/pull/2029) - Correct intervals channel for parabricks + +### Removed + +### Dependencies + +| Dependency | Old version | New version | +| ------------ | ----------- | ----------- | +| `parabricks` | 4.5.1-1 | 4.6.0-1 | + +### Parameters + +| Params | status | +| ------ | ------ | + +### Developer section + +#### Added + +#### Changed + +- [2026](https://github.com/nf-core/sarek/pull/2026) - Back to dev +- [2038](https://github.com/nf-core/sarek/pull/2038) - Improve customising parabricks parameters docs + +#### Fixed + +#### Removed + +## [3.6.0](https://github.com/nf-core/sarek/releases/tag/3.6.0) - Kvikkjokk + +Kvikkjokk is a village where many trails start that lead through sarek. + +### Added + +- [1682](https://github.com/nf-core/sarek/pull/1682), [1770](https://github.com/nf-core/sarek/pull/1770) - Add `bcftools_norm` in `POST_VARIANTCALLING` for normalization of all vcf files +- [1744](https://github.com/nf-core/sarek/pull/1744) - Add MuSE as new somatic variant caller +- [1804](https://github.com/nf-core/sarek/pull/1840) - Add parabricks/fq2bam as alternative to fastq_preprocess_gatk +- [1817](https://github.com/nf-core/sarek/pull/1817) - Added new contributor +- [1841](https://github.com/nf-core/sarek/pull/1841) - Add pcr-indel-model parameter for GATK HaplotypeCaller +- [1848](https://github.com/nf-core/sarek/pull/1848) - Add parameter for setting pixel distance for GATK MarkDuplicates +- [1856](https://github.com/nf-core/sarek/pull/1856) - Added early failure when more than 1 normal sample per patient is provided for somatic variant calling +- [1904](https://github.com/nf-core/sarek/pull/1904) - Icon support +- [1934](https://github.com/nf-core/sarek/pull/1934) - Add sentieon TNscope for tumour/normal variant calling +- [1937](https://github.com/nf-core/sarek/pull/1937) - Add UMI extraction for deduplication using fastp via `--umi_location`, `--umi_length` and `--umi_base_skip` +- [1937](https://github.com/nf-core/sarek/pull/1937) - Add consensus read generation when using sentieon dedup via `--sentieon_consensus` +- [1937](https://github.com/nf-core/sarek/pull/1937) - Add support for UMIs in read headers via `--umi_in_read_header` +- [1937](https://github.com/nf-core/sarek/pull/1937) - Use fgbio plugin to check read structure parameter is valid +- [1938](https://github.com/nf-core/sarek/pull/1938) - Add checks for uniqueness of sample ids and lane ids +- [1939](https://github.com/nf-core/sarek/pull/1939) - Modify config to add readgroups to fq2bam +- [1940](https://github.com/nf-core/sarek/pull/1940) - Add varlociraptor for variant calling +- [1953](https://github.com/nf-core/sarek/pull/1953) - Update freebayes and add QUAL filtering +- [1965](https://github.com/nf-core/sarek/pull/1965),[2017](https://github.com/nf-core/sarek/pull/2017) - Add msisensor2 +- [1973](https://github.com/nf-core/sarek/pull/1973) - Update bcftools/annotate to pick up columns file input +- [2021](https://github.com/nf-core/sarek/pull/2021),[1983](https://github.com/nf-core/sarek/pull/1983) - Add BBsplit + +### Changed + +- [1682](https://github.com/nf-core/sarek/pull/1682) - Edit vcf_concatenate_germline subworkflow +- [1810](https://github.com/nf-core/sarek/pull/1810) - Move non-informative information in the CHANGELOG for the end user to its own Developer section +- [1890](https://github.com/nf-core/sarek/pull/1890) - Improve and update metro map +- [1903](https://github.com/nf-core/sarek/pull/1903) - Double the default `time` for all processes +- [1922](https://github.com/nf-core/sarek/pull/1922) - Update ASCAT module to v3.2.0 +- [1937](https://github.com/nf-core/sarek/pull/1937) - Swapped from samblaster to samtools within the fgbio consensus generation +- [1961](https://github.com/nf-core/sarek/pull/1961) - Update ensemblvep cache version to 114 +- [1961](https://github.com/nf-core/sarek/pull/1961) - Update ensemblvep cache version to 115 +- [1961](https://github.com/nf-core/sarek/pull/1961) - Update ensemblvep modules to 114.2 +- [1964](https://github.com/nf-core/sarek/pull/1964) - Update ensemblvep modules to 115.0 +- [1982](https://github.com/nf-core/sarek/pull/1982) - Update ensemblvep modules to 115.1 +- [1988](https://github.com/nf-core/sarek/pull/1988) - Update manta modules to latest build +- [1993](https://github.com/nf-core/sarek/pull/1993) - Update snpeff modules to 5.3.0a and ensemblvep modules to 115.2 +- [2003](https://github.com/nf-core/sarek/pull/2003),[2005](https://github.com/nf-core/sarek/pull/2005) - Update subway map to clarify post-variantcalling processing options, and add BBSplit +- [2009](https://github.com/nf-core/sarek/pull/2009) - Update multiqc module to 1.31 +- [2011](https://github.com/nf-core/sarek/pull/2011) - Downgrade snpeff db to at least 99 due to 105 not being available at the moment + +### Fixed + +- [1842](https://github.com/nf-core/sarek/pull/1842) - Updated the input validation of the pipeline to be more strict, thus preventing more issues when running the pipeline +- [1849](https://github.com/nf-core/sarek/pull/1849) - Fix bug in sample_lane_id definition in addReadgroupToMeta function +- [1858](https://github.com/nf-core/sarek/pull/1858) - Fix bug in parameter validation +- [1896](https://github.com/nf-core/sarek/pull/1896) - Add information on gatk_spark and save_output_as_bam +- [1928](https://github.com/nf-core/sarek/pull/1928) - Fix cnvkit when using --no_intervals, and correct cpu allocation +- [1937](https://github.com/nf-core/sarek/pull/1937) - Individual lanes are now merged together before fgbio consensus generation is performed +- [1992](https://github.com/nf-core/sarek/pull/1992) - Add `--sample_name` argument to deepvariant when `meta.sample` is available + +### Removed + +- [1806](https://github.com/nf-core/sarek/pull/1806) - Remove some files publication with the --concatenate_vcfs options + +### Dependencies + +| Dependency | Old version | New version | +| -------------------------------------- | ----------- | ----------- | +| `ascat` | 3.1.1 | 3.2.0 | +| `bcftools` | 1.20 | 1.21 | +| `deepvariant` | 1.8.0 | 1.9.0 | +| `ensemblvep` | 113.0 | 115.2 | +| `gawk` | 5.1.0 | 5.3.0 | +| `fastp` | 0.23.4 | 0.24.0 | +| `fgbio` | 2.2.1 | 2.4.0 | +| `freebayes` | 1.3.6 | 1.3.10 | +| `gatk4` | 4.5.0.0 | 4.6.1.0 | +| `mosdepth` | 0.3.8 | 0.3.10 | +| `msisensor2` | | 0.1 | +| `msisensorpro` | 1.2.0 | 1.3.0 | +| `multiqc` | 1.25.1 | 1.31 | +| `muse` | | 2.1.2 | +| `parabricks` | | 4.5.1-1 | +| `rbt` | | 0.42.2 | +| `samblaster` | 0.1.26 | removed | +| `samtools` (in `BWAMEM1_MEM`) | 1.2 | 1.21 | +| `samtools` (in `BWAMEM2_MEM`) | 1.19.2 | 1.21 | +| `samtools` (in `GATK4_MARKDUPLICATES`) | 1.19.2 | 1.21 | +| `sentieon` | 202308.03 | 202503.01 | +| `snpeff` | 5.1 | 5.3a | +| `tabix` | 1.2 | 1.21 | +| `varlociraptor` | | 8.7.4 | +| `vcflib` | | 1.0.14 | +| `yte` | | 1.9.0 | + +### Parameters + +| Params | status | +| ------------------------------------- | ------ | +| `--freebayes_filter` | New | +| `--msisensor2_models` | New | +| `--msisensorpro_scan` | New | +| `--sentieon_consensus` | New | +| `--umi_base_skip` | New | +| `--umi_in_read_header` | New | +| `--umi_length` | New | +| `--umi_location` | New | +| `--umi_tag` | New | +| `--varlociraptor_chunk_size` | New | +| `--varlociraptor_scenario_germline` | New | +| `--varlociraptor_scenario_somatic` | New | +| `--varlociraptor_scenario_tumor_only` | New | + +### Developer section + +#### Added + +- [1803](https://github.com/nf-core/sarek/pull/1803) - Back to dev +- [1806](https://github.com/nf-core/sarek/pull/1806) - Use `nft-vcf` for nf-test vcf assertions +- [1814](https://github.com/nf-core/sarek/pull/1814) - Added link to Bluesky +- [1815](https://github.com/nf-core/sarek/pull/1815) - Create nf-test pipeline vcf concatenation + normalize tests +- [1829](https://github.com/nf-core/sarek/pull/1829) - Add muse as variant caller to images +- [1835](https://github.com/nf-core/sarek/pull/1835) - Add GPU testing possibilities +- [1855](https://github.com/nf-core/sarek/pull/1855) - Add contributors info to the contributors field in the manifest +- [1922](https://github.com/nf-core/sarek/pull/1922) - Added tests for ASCAT + +#### Changed + +- [1761](https://github.com/nf-core/sarek/pull/1761) - Skip nf-test on docs changes +- [1806](https://github.com/nf-core/sarek/pull/1806) - Migrate pipeline pytest vcf concatenation tests to nf-test +- [1809](https://github.com/nf-core/sarek/pull/1809) - Replace `getReadsMD5()` by `readsMD5` from `nft-bam` plugin for more global cohesion with usage of `nft-vcf` plugin +- [1810](https://github.com/nf-core/sarek/pull/1810) - Implement automatic sharding for nf-test tests +- [1810](https://github.com/nf-core/sarek/pull/1810) - Skip all CI but linting on docs changes +- [1812](https://github.com/nf-core/sarek/pull/1812) - Move gatk based preprocessing to local subworkflow +- [1815](https://github.com/nf-core/sarek/pull/1815) - Migrate pipeline pytest vcf normalize tests to nf-test +- [1819](https://github.com/nf-core/sarek/pull/1819) - Migrate pipeline pytest tiddit tests to nf-test +- [1820](https://github.com/nf-core/sarek/pull/1820) - Migrate pipeline pytest manta tests to nf-test +- [1821](https://github.com/nf-core/sarek/pull/1821) - Migrate pipeline pytest freebayes tests to nf-test +- [1825](https://github.com/nf-core/sarek/pull/1825) - Migrate pipeline pytest cnvkit tests to nf-test +- [1826](https://github.com/nf-core/sarek/pull/1826) - Migrate pipeline pytest mpileup tests to nf-test +- [1827](https://github.com/nf-core/sarek/pull/1827) - Migrate pipeline pytest haplotypecaller tests to nf-test +- [1828](https://github.com/nf-core/sarek/pull/1828) - Migrate pipeline pytest lofreq tests to nf-test +- [1831](https://github.com/nf-core/sarek/pull/1831) - Migrate pipeline pytest fastp tests to nf-test +- [1832](https://github.com/nf-core/sarek/pull/1832) - Update all annotation related modules and subworkflows +- [1847](https://github.com/nf-core/sarek/pull/1847) - Runs on custom runners :rocket: thanks to [RunsOn](https://runs-on.com/) +- [1852](https://github.com/nf-core/sarek/pull/1852) - Ignore tests from modules +- [1852](https://github.com/nf-core/sarek/pull/1852) - Improve some nf-test tests +- [1866](https://github.com/nf-core/sarek/pull/1866) - Migrate pipeline pytest deepvariant tests to nf-test +- [1867](https://github.com/nf-core/sarek/pull/1867) - Migrate pipeline pytest gatk4spark tests to nf-test +- [1868](https://github.com/nf-core/sarek/pull/1868) - Migrate pipeline pytest intervals tests to nf-test +- [1871](https://github.com/nf-core/sarek/pull/1871) - Update all modules +- [1874](https://github.com/nf-core/sarek/pull/1874) - Migrate pipeline pytest joint_calling haplotypecaller tests to nf-test +- [1874](https://github.com/nf-core/sarek/pull/1874) - Migrate pipeline pytest joint_calling mutect2 tests to nf-test +- [1874](https://github.com/nf-core/sarek/pull/1874) - Migrate pipeline pytest mutect2 tests to nf-test +- [1874](https://github.com/nf-core/sarek/pull/1874) - More global cohesion in the all nf-test tests +- [1876](https://github.com/nf-core/sarek/pull/1876) - Migrate pipeline pytest ngscheckmate tests to nf-test +- [1877](https://github.com/nf-core/sarek/pull/1877) - Migrate pipeline pytest msisensorpro tests to nf-test +- [1878](https://github.com/nf-core/sarek/pull/1878) - Migrate pipeline pytest umi tests to nf-test +- [1879](https://github.com/nf-core/sarek/pull/1879) - Template update for nf-core/tools v3.2.1 +- [1892](https://github.com/nf-core/sarek/pull/1892) - Make jobs automatically resubmit for exit code 175 +- [1917](https://github.com/nf-core/sarek/pull/1917) - stub tests have stub tag +- [1927](https://github.com/nf-core/sarek/pull/1927) - Migrate pipeline pytest sentieon tests to nf-test +- [1932](https://github.com/nf-core/sarek/pull/1932) - Refactor and simplify pipeline test suite +- [1936](https://github.com/nf-core/sarek/pull/1936) - Template update for nf-core/tools v3.3.2 +- [1953](https://github.com/nf-core/sarek/pull/1953) - Change freebayes tests from gzip to md5sum +- [1954](https://github.com/nf-core/sarek/pull/1954) - Refactor bcftools annotation subworkflows so that no diff is necessary from nf-core/modules +- [1962](https://github.com/nf-core/sarek/pull/1962) - Update gatk and gatk4spark applybqsr modules and subsequent subworkflows to deal with the newly added ext.suffix +- [1965](https://github.com/nf-core/sarek/pull/1965) - Refactor cram to bam conversion which is now is done even more upstream (following [1967](https://github.com/nf-core/sarek/pull/1967)) +- [1967](https://github.com/nf-core/sarek/pull/1967) - Refactor muse subworkflows so that cram to bam conversion is done upstream +- [1967](https://github.com/nf-core/sarek/pull/1967) - Update msisensorpro modules to adhere to language server +- [1970](https://github.com/nf-core/sarek/pull/1970) - Update controlfreec modules to adhere to language server +- [1975](https://github.com/nf-core/sarek/pull/1975) - Now fails when no dbnsfp_tbi is provided when dbnsfp is +- [1977](https://github.com/nf-core/sarek/pull/1977) - Update sentieon modules to adhere to language server +- [1985](https://github.com/nf-core/sarek/pull/1985) - Update subway maps and workflow pictures to include msisensor2 following [1965](https://github.com/nf-core/sarek/pull/1965) +- [1990](https://github.com/nf-core/sarek/pull/1990) - Update parabricks/fq2bam to 4.5.1-1 +- [2001](https://github.com/nf-core/sarek/pull/2001) - Remove lofreq VCF MD5sum - use summary +- [2007](https://github.com/nf-core/sarek/pull/2007) - Sort tools in test_full configs and add msisensor2 +- [2008](https://github.com/nf-core/sarek/pull/2008), [2010](https://github.com/nf-core/sarek/pull/2010) - Skip conda tests that cannot be run due to lacking dependencies +- [2014](https://github.com/nf-core/sarek/pull/2014) - Prepare release and address comments + +#### Fixed + +- [1806](https://github.com/nf-core/sarek/pull/1806) - Fix some nf-test assertions +- [1809](https://github.com/nf-core/sarek/pull/1809) - Deals with nf-test snapshoting empty lists in a better way (https://github.com/nf-core/sarek/issues/1807) +- [1814](https://github.com/nf-core/sarek/pull/1814) - Fix link to GHA CI broken by [1810](https://github.com/nf-core/sarek/pull/1810) +- [1845](https://github.com/nf-core/sarek/pull/1845) - Modifying `.nftignore` should retrigger nf-test (cf https://github.com/nf-core/tools/pull/3508) +- [1852](https://github.com/nf-core/sarek/pull/1852) - Fix path to `license_message.py` script +- [1852](https://github.com/nf-core/sarek/pull/1852) - Modifying `assets/schema_input.json` and `nextflow_schema.json` should retrigger nf-test (cf https://github.com/nf-core/sarek/pull/1842) +- [1855](https://github.com/nf-core/sarek/pull/1855) - Fix json schema cf_chrom_len input broken by [1842](https://github.com/nf-core/sarek/pull/1842) +- [1859](https://github.com/nf-core/sarek/pull/1859) - Fix: change dbsnp channel from queue to value in muse subworkflow, wrong implemented in [1744](https://github.com/nf-core/sarek/pull/1744) +- [1899](https://github.com/nf-core/sarek/pull/1899) - Ensure nf-test runs for all profiles on release +- [1917](https://github.com/nf-core/sarek/pull/1917) - Ensure all versions and reports are reported to MultiQC +- [1927](https://github.com/nf-core/sarek/pull/1927) - Fixed Sentieon variant calling broken by [1871](https://github.com/nf-core/sarek/pull/1871) +- [1930](https://github.com/nf-core/sarek/pull/1930) - Fixed tests when no sentieon license/ENV are provided +- [1932](https://github.com/nf-core/sarek/pull/1932) - Fix typo for in UMI warning message +- [1933](https://github.com/nf-core/sarek/pull/1933) - Correct link in README +- [1935](https://github.com/nf-core/sarek/pull/1935) - Fix bug in samplesheet_to_channel workflow due to bad integer handling with lane +- [1984](https://github.com/nf-core/sarek/pull/1984) - Fix tuple mismatch in GATK_BASERECALIBRATOR +- [1996](https://github.com/nf-core/sarek/pull/1996) - MuSE: change to htslib for bgzip and tabix & language server +- [1998](https://github.com/nf-core/sarek/pull/1998) - Adjust VEP versions in snap files with corrected module +- [2002](https://github.com/nf-core/sarek/pull/2002) - Update strelka to specific build to fix error in somatic mode +- [2013](https://github.com/nf-core/sarek/pull/2013) - Fix snaps after MultiQC update +- [2019](https://github.com/nf-core/sarek/pull/2019) - Allow other cloud buckets than `s3://annotation-cache/` for the VEP and snpEff cache + +#### Removed + +- [1814](https://github.com/nf-core/sarek/pull/1814) - Removed link to Twitter/X +- [1884](https://github.com/nf-core/sarek/pull/1884) - Remove pytest-workflow from CI (copied from [1729](https://github.com/nf-core/sarek/pull/1729)) +- [1956](https://github.com/nf-core/sarek/pull/1956) - Remove tests for downloading cache (Snpeff and VEP) (we already have tests for the cache in the modules) +- [1957](https://github.com/nf-core/sarek/pull/1957) - Remove duplicated gpu profile from config +- [1994](https://github.com/nf-core/sarek/pull/1994) - Prevent conda profile running on gpu +- [1995](https://github.com/nf-core/sarek/pull/1995) - Remove md5sums of png files for controlfreec +- [1997](https://github.com/nf-core/sarek/pull/1997) - Remove png and pdf md5sums for all variant callers +- [2004](https://github.com/nf-core/sarek/pull/2004) - Remove .cram.metrics and .cram.metrics.multiqc.tsv md5sums for sentieon + +## [3.5.1](https://github.com/nf-core/sarek/releases/tag/3.5.1) - Akkatjåkkå + +Akkatjåkkå is another glacier. + +### Added + +- [1759](https://github.com/nf-core/sarek/pull/1759) - Back to dev + +### Changed + +- [1767](https://github.com/nf-core/sarek/pull/1767) - Bump nf-schema version to 2.2.1 +- [1777](https://github.com/nf-core/sarek/pull/1777) - Merge intervals for WES in GATK GenomicsDBImport +- [1798](https://github.com/nf-core/sarek/pull/1798) - Prepare release 3.5.1 + +### Fixed + +- [1797](https://github.com/nf-core/sarek/pull/1797) - Use `file-path-pattern` over `file-path` to hanlde glob for `known_indels` and `known_indels_tbi` to fix [1785](https://github.com/nf-core/sarek/issues/1785) +- [1802](https://github.com/nf-core/sarek/pull/1802) - Update GHA for full_test tests + +### Removed + +### Dependencies + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | + +### Parameters + +| Params | Status | +| ------ | ------ | + +## [3.5.0](https://github.com/nf-core/sarek/releases/tag/3.5.0) - Áhkájiegna + +A set of connecting glaciers. + +### Added + +- [1613](https://github.com/nf-core/sarek/pull/1613) - add indexcov +- [1638](https://github.com/nf-core/sarek/pull/1638) - Added additional documentation detailing ASCAT WES usage. +- [1640](https://github.com/nf-core/sarek/pull/1620) - Add `lofreq` as a tumor-only variant caller +- [1642](https://github.com/nf-core/sarek/pull/1642) - Back to dev +- [1653](https://github.com/nf-core/sarek/pull/1653) - Updates `sarek_subway` files with `lofreq` +- [1660](https://github.com/nf-core/sarek/pull/1642) - Add `--length_required` for minimal reads length with `FASTP` +- [1663](https://github.com/nf-core/sarek/pull/1663) - Massive conda modules update +- [1664](https://github.com/nf-core/sarek/pull/1664) - Check if flowcell ID matches for read pair +- [1730](https://github.com/nf-core/sarek/pull/1730) - Enable Harshil Alignment™️ in VS Code workspace settings + +### Changed + +- [1579](https://github.com/nf-core/sarek/pull/1579) - Update Sentieon usage docs +- [1635](https://github.com/nf-core/sarek/pull/1635) - Fix docs to reflect variant calling tool - data type correctly +- [1668](https://github.com/nf-core/sarek/pull/1668) - Add nf-test sharding CI +- [1669](https://github.com/nf-core/sarek/pull/1669) - Better nf-test pipeline level tests +- [1677](https://github.com/nf-core/sarek/pull/1677) - Migrate pytest aligner and pipeline default tests to nf-test +- [1680](https://github.com/nf-core/sarek/pull/1680) - Template update for nf-core/tools v3.0.0 +- [1681](https://github.com/nf-core/sarek/pull/1681) - Template update for nf-core/tools v3.0.1 +- [1686](https://github.com/nf-core/sarek/pull/1686) - Template update for nf-core/tools v3.0.2 +- [1692](https://github.com/nf-core/sarek/pull/1692) - Update ensemblvep +- [1695](https://github.com/nf-core/sarek/pull/1695) - Update all modules +- [1707](https://github.com/nf-core/sarek/pull/1707) - Un-hide parameters and clean up Json schema +- [1708](https://github.com/nf-core/sarek/pull/1708) - Migrate pipeline pytest alignment and annotation tests to nf-test +- [1711](https://github.com/nf-core/sarek/pull/1711) - Migrate pipeline pytest strelka tests to nf-test +- [1731](https://github.com/nf-core/sarek/pull/1731) - Migrate pipeline pytest controlfreec tests to nf-test + +### Fixed + +- [1624](https://github.com/nf-core/sarek/pull/1624) - Fix channel stalling for bcftools index +- [1657](https://github.com/nf-core/sarek/pull/1657) - Update all actions used in the GHA CI +- [1661](https://github.com/nf-core/sarek/pull/1661) - nf-test pipeline level tests +- [1673](https://github.com/nf-core/sarek/pull/1673) - Print warning message instead of silent error with Nextflow versions prior to 24.08.0edge +- [1693](https://github.com/nf-core/sarek/pull/1693) - Fixes flowcell retrieval during samplesheet parsing +- [1694](https://github.com/nf-core/sarek/pull/1694) - Fix manifest DOI display on CLI +- [1695](https://github.com/nf-core/sarek/pull/1695) - Fix and update input_schema.json +- [1702](https://github.com/nf-core/sarek/pull/1702) - Update nf-schema tests that were not failing on lenient mode +- [1712](https://github.com/nf-core/sarek/pull/1712) - Fix missing import statements on error messages when starting without samplesheet +- [1743](https://github.com/nf-core/sarek/pull/1743) - Add setup java 17 in GHA for latest Nextflow version +- [1745](https://github.com/nf-core/sarek/pull/1745) - Fix bug where workflow can hang if the email parameter is set +- [1746](https://github.com/nf-core/sarek/pull/1746) - Fix Sentieon module inputs +- [1752](https://github.com/nf-core/sarek/pull/1752) - Add `indexcov` and `lofreq` to full size tests. Amend overview figures. +- [1754](https://github.com/nf-core/sarek/pull/1754) - Fix test string +- [1755](https://github.com/nf-core/sarek/pull/1755) - Remove `default` channel and name from local modules +- [1757](https://github.com/nf-core/sarek/pull/1757) - Fix Changelog by adding missing new parameters + +### Removed + +- [1656](https://github.com/nf-core/sarek/pull/1656) - Retiring parameter `snpeff_genome` +- [1709](https://github.com/nf-core/sarek/pull/1709) - Remove `Strelka` tumor-only somatic variant calling +- [1728](https://github.com/nf-core/sarek/pull/1728) - Remove BAM to CRAM conversion of input files for post-alignment entry points + +### Dependencies + +| Dependency | Old version | New version | +| ------------- | ----------- | ----------- | +| `coreutils` | 8.30 | 9.5 | +| `deepvariant` | 1.5.0 | 1.6.1 | +| `ensemblvep` | 111.0 | 113.0 | +| `fgbio` | 2.0.2 | 2.1.2 | +| `gawk` | 5.1.0 | 5.3.0 | +| `htslib` | 1.20 | 1.21 | +| `lofreq` | | 2.1.5 | +| `multiqc` | 1.21 | 1.25.1 | +| `samtools` | 1.20 | 1.21 | +| `sentieon` | 202308.02 | 202308.03 | +| `svdb` | 2.8.1 | 2.8.2 | + +### Parameters + +| Params | Status | +| ------------------------------------ | ------- | +| `--help_full` | New | +| `--length_required` | New | +| `--show_hidden` | New | +| `--snpeff_db` | Updated | +| `--snpeff_genome` | Removed | +| `--validationFailUnrecognisedParams` | Removed | +| `--validationLenientMode` | Removed | +| `--validationSchemaIgnoreParams` | Removed | +| `--validationShowHiddenParams` | Removed | + +## [3.4.4](https://github.com/nf-core/sarek/releases/tag/3.4.4) - Ruopsokjåkhå + +Ruopsokjåkhå is another peak of the Pårte massif. + +### Added + +- [1614](https://github.com/nf-core/sarek/pull/1614) - Back to dev +- [1639](https://github.com/nf-core/sarek/pull/1639) - Bump version to prepare release + +### Changed + +- [1627](https://github.com/nf-core/sarek/pull/1627) - Correct tower reports/snpeff format + +### Fixed + +- [1623](https://github.com/nf-core/sarek/pull/1623) - Update docs to clarify vep cache folder organisation +- [1628](https://github.com/nf-core/sarek/pull/1628) - Fix dbsnp channel mapping in germline variant calling subworkflow + +### Removed + +### Dependencies + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | + +### Parameters + +## [3.4.3](https://github.com/nf-core/sarek/releases/tag/3.4.3) - Loametjåhkkå + +Loametjåhkkå is another one of the main peaks of the Pårte massif. + +### Added + +- [#1502](https://github.com/nf-core/sarek/pull/1502) - export CNVs into VCF format in `bam_variant_calling_cnvkit` +- [#1534](https://github.com/nf-core/sarek/pull/1534), [#1573](https://github.com/nf-core/sarek/pull/1573), [#1734](https://github.com/nf-core/sarek/pull/1534) - Handling `.fastq.gz.spring` files as input +- [#1593](https://github.com/nf-core/sarek/pull/1593) - Prepare release `3.4.2` + +### Changed + +- [#1502](https://github.com/nf-core/sarek/pull/1502) - Improved handling of CNVkit reference +- [#1502](https://github.com/nf-core/sarek/pull/1502) - Specific CNV call step, with recommended settings for germline +- [#1508](https://github.com/nf-core/sarek/pull/1508) - Sync `TEMPLATE` with `tools` `2.14.0` +- [#1513](https://github.com/nf-core/sarek/pull/1513) - Back to dev +- [#1518](https://github.com/nf-core/sarek/pull/1518) - Sync `TEMPLATE` with `tools` `2.14.1` +- [#1521](https://github.com/nf-core/sarek/pull/1521) - Minor code refactoring to simplify syntax in args handling +- [#1545](https://github.com/nf-core/sarek/pull/1545) - Update modules +- [#1552](https://github.com/nf-core/sarek/pull/1552) - Update samtools to v1.20 +- [#1545](https://github.com/nf-core/sarek/pull/1545) - Update modules +- [#1553](https://github.com/nf-core/sarek/pull/1553) - Update bcftools to v1.20 +- [#1557](https://github.com/nf-core/sarek/pull/1557) - Update ENSEMBLVEP cache to 111 + +### Fixed + +- [#1536](https://github.com/nf-core/sarek/pull/1536) - Correct typo `Strelka2` to `Strelka` +- [#1541](https://github.com/nf-core/sarek/pull/1541) - Getting bam and bai published in the same folder +- [#1542](https://github.com/nf-core/sarek/pull/1542) - Removing legacy configs of `CUSTOM_DUMPSOFTWAREVERSIONS` +- [#1547](https://github.com/nf-core/sarek/pull/1547) - Correct typo in help text in nextflow_schema.json +- [#1556](https://github.com/nf-core/sarek/pull/1556) - Fix display of some commands in `docs/usage.md` +- [#1563](https://github.com/nf-core/sarek/pull/1563) - Fix `vep_cache_path_full` so that `--refseq/--merged` will work for ENSEMBLVEP +- [#1570](https://github.com/nf-core/sarek/pull/1570) - Remove duplicated notes in FASTQC output docs +- [#1596](https://github.com/nf-core/sarek/pull/1596) - Fix haplotypecaller tests +- [#1597](https://github.com/nf-core/sarek/pull/1597) - Fix deepvariant tests +- [#1612](https://github.com/nf-core/sarek/pull/1612) - Remove empty output directories + +### Removed + +### Dependencies + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `bcftools` | 1.18 | 1.20 | +| `bwa` | 0.7.17 | 0.7.18 | +| `cnvkit` | 0.9.10 | 0.9.11 | +| `htslib` | 1.19.1 | 1.20 | +| `samtools` | 1.19.2 | 1.20 | + +### Parameters + +## [3.4.2](https://github.com/nf-core/sarek/releases/tag/3.4.2) - Sájtáristjåhkkå + +Sájtáristjåhkkå is another peak (just under 2k) in the Pårte massif, it is one of the few peaks in Sweden that cannot be climbed without proper climbing equipment. + +### Added + +- [#1489](https://github.com/nf-core/sarek/pull/1489) - Added a `testdata.nf-core.sarek` key in `conf/igenomes.config` for small reference +- [#1493](https://github.com/nf-core/sarek/pull/1493) - Added a `wave` profile +- [#1498](https://github.com/nf-core/sarek/pull/1498) - Prepare release `3.4.2` + +### Changed + +- [#1477](https://github.com/nf-core/sarek/pull/1477) - Back to dev +- [#1482](https://github.com/nf-core/sarek/pull/1482) - Pin `nf-prov` plugin to `1.2.2` +- [#1485](https://github.com/nf-core/sarek/pull/1485) - Update citation for publication +- [#1487](https://github.com/nf-core/sarek/pull/1487) - Update sentieon-modules to Sentieon `202308.02` +- [#1490](https://github.com/nf-core/sarek/pull/1490) - Update mosdepth to `0.3.8` +- [#1505](https://github.com/nf-core/sarek/pull/1505) - Update CITATIONS.md +- [#1506](https://github.com/nf-core/sarek/pull/1506) - Fixing typos (`index_alignement` -> `index_alignment`) +- [#1509](https://github.com/nf-core/sarek/pull/1509) - Update contributors + +### Fixed + +- [#1378](https://github.com/nf-core/sarek/pull/1378) - Improve cloud tests launch workflow to use matrix +- [#1488](https://github.com/nf-core/sarek/pull/1488) - Fixing call to `GATK4_HAPLOTYPECALLER` and thereby also the test-profile `test_full_germline` +- [#1494](https://github.com/nf-core/sarek/pull/1494) - Fix Cloud Storage objects are immutable on GCP [#1491](https://github.com/nf-core/sarek/issues/1491) +- [#1496](https://github.com/nf-core/sarek/pull/1496) - Fix multiple DOI handling in manifest +- [#1499](https://github.com/nf-core/sarek/pull/1499) - Remove all md5sum for mosdepth tests +- [#1499](https://github.com/nf-core/sarek/pull/1499) - Add mosdepth dependency to all tests runnning it +- [#1501](https://github.com/nf-core/sarek/pull/1501) - Remove string "None" param option from ascat_genome + +### Removed + +- [#1489](https://github.com/nf-core/sarek/pull/1489) - Remove `test_cache` profile + +### Dependencies + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `mosdepth` | 0.3.6 | 0.3.8 | +| `sentieon` | 202308.01 | 202308.02 | + +### Modules / Subworkflows + +### Parameters + +## [3.4.1](https://github.com/nf-core/sarek/releases/tag/3.4.1) - Balgattjåhkkå + +Balgattjåhkkå is the other top peak (over 2k m) in the Pårte massif, the other one being Pårtetjåkko (Bårddetjåhkkå). + +### Added + +- [#1272](https://github.com/nf-core/sarek/pull/1372) - Add integration with NCBench: Automatic submission of latest benchmarking runs +- [#1333](https://github.com/nf-core/sarek/pull/1333) - Back to dev +- [#1335](https://github.com/nf-core/sarek/pull/1335) - Add index computation of `bcftools_annotations`, if not provided +- [#1340](https://github.com/nf-core/sarek/pull/1340) - Adds Azure test profiles and megatests +- [#1372](https://github.com/nf-core/sarek/pull/1372) - Add NCBench test profile for Agilent datasets +- [#1409](https://github.com/nf-core/sarek/pull/1409) - Add params `modules_testdata_base_path` to test profile + +### Changed + +- [#1339](https://githu.com/nf-core/sarek/pull/1339), [#1401](https://github.com/nf-core/sarek/pull/1401) - Update sentieon-modules to Sentieon `202308.01` and adding support for running Sentieon with Conda and Apptainer +- [#1344](https://github.com/nf-core/sarek/pull/1344) - Enable CRAM QC, when starting from variantcalling +- [#1359](https://github.com/nf-core/sarek/pull/1359) - Removing params usage from local modules +- [#1359](https://github.com/nf-core/sarek/pull/1359) - Removing params usage from local subworkflows +- [#1360](https://github.com/nf-core/sarek/pull/1360) - Sync `TEMPLATE` with `tools` `2.11` +- [#1385](https://github.com/nf-core/sarek/pull/1385), [#1436](https://github.com/nf-core/sarek/pull/1436) - Sync `TEMPLATE` with `tools` `2.12` +- [#1408](https://github.com/nf-core/sarek/pull/1408), [#1412](https://github.com/nf-core/sarek/pull/1412) - Updating samtools to v1.19.2 - except in GATK/markduplicates. (Temporarily disabled nf-test for bwamem2/mem.) +- [#1411](https://github.com/nf-core/sarek/pull/1411) - Temporarily disable sentieon related tests +- [#1414](https://github.com/nf-core/sarek/pull/1414) - Sync `TEMPLATE` with `tools` `2.13` +- [#1419](https://github.com/nf-core/sarek/pull/1419) - Updating GATK to v4.5, and updating samtools to v1.19.2 in GATK/markduplicates +- [#1426](https://github.com/nf-core/sarek/pull/1426) - Updating certain modules in order to fix the testdata-path in the nf-tests of those modules. Setting Docker runOptions for params.use_gatk_spark +- [#1428](https://github.com/nf-core/sarek/pull/1428) - Sync `TEMPLATE` with `tools` `2.13.1` +- [#1422](https://github.com/nf-core/sarek/pull/1422) - Refactoring following `TEMPLATE` sync with `tools` `2.13` +- [#1431](https://github.com/nf-core/sarek/pull/1431) - Using docker.containerOptions instead of docker.runOptions. Clearing containerOptions for SPARK modules for any kind of supported container engine +- [#1439](https://github.com/nf-core/sarek/pull/1439) - Replacing the local module `BUILD_INTERVALS` with the nf-core module `GAWK` +- [#1456](https://github.com/nf-core/sarek/pull/1456), [#1472](https://github.com/nf-core/sarek/pull/1472), [#1473](https://github.com/nf-core/sarek/pull/1473) - Revert usage of docker.runOptions. Add an empty docker.runOptions when using the new `spark` profile +- [#1457](https://github.com/nf-core/sarek/pull/1457) - Update all modules +- [#1466](https://github.com/nf-core/sarek/pull/1466) - Update `VEP` + +### Fixed + +- [#1334](https://github.com/nf-core/sarek/pull/1334) - Remove extra v, when reporting tower runs on slack +- [#1335](https://github.com/nf-core/sarek/pull/1335) - Add docs and validation for bcftools annotation parameters +- [#1345](https://github.com/nf-core/sarek/pull/1345) - Preserve STDERR for easier debugging +- [#1351](https://github.com/nf-core/sarek/pull/1351) - Fix params name for test profiles (`bcftools_annotations`) +- [#1357](https://github.com/nf-core/sarek/pull/1364) - Fixed bug where samples were dropped while reconstituting BAM files +- [#1373](https://github.com/nf-core/sarek/pull/1373) - Add `chr` prefix to NCBench bed file & enable trimming +- [#1381](https://github.com/nf-core/sarek/pull/1381) - Swap NGSCheckMate bed file for GATK.GRCh37 to one without the `chr` prefix +- [#1383](https://github.com/nf-core/sarek/pull/1383) - Fix `--three_prime_clip_r{1,2}` parameter documentation +- [#1390](https://github.com/nf-core/sarek/pull/1390) - Fix badges in README +- [#1400](https://github.com/nf-core/sarek/pull/1400) - Fixed input channel for ASSESS_SIGNIFICANCE module, updated makegraph to makegraph2 +- [#1403](https://github.com/nf-core/sarek/pull/1403) - Fix intervals usage with dot in chromosome names +- [#1407](https://github.com/nf-core/sarek/pull/1407) - Fix CI tests name +- [#1420](https://github.com/nf-core/sarek/pull/1420) - Make `-a` a default argument for `bcftools` concat +- [#1422](https://github.com/nf-core/sarek/pull/1422) - Fix `Cannot serialize context map` warning +- [#1462](https://github.com/nf-core/sarek/pull/1462) - Fix ascat input channels +- [#1463](https://github.com/nf-core/sarek/pull/1463) - Add `spark` profile to all gatk4spark tests +- [#1465](https://github.com/nf-core/sarek/pull/1465), [#1469](https://github.com/nf-core/sarek/pull/1469) - Fix input channels and tests of Sentieon workflows +- [#1470](https://github.com/nf-core/sarek/pull/1470) - Fix channels for `MultiQC` +- [#1471](https://github.com/nf-core/sarek/pull/1471) - Add `snpeff_db` params to `validationSchemaIgnoreParams` to fix issues with Seqera Platform +- [#1471](https://github.com/nf-core/sarek/pull/1471) - Add `vep_cache_version` params to `validationSchemaIgnoreParams` to fix [#1454](https://github.com/nf-core/sarek/issues/1454) +- [#1471](https://github.com/nf-core/sarek/pull/1471) - Update `vep_version` params match the actual tool version +- [#1472](https://github.com/nf-core/sarek/pull/1472) - Cast `snpeff_db` params as a string to fix issues with Seqera Platform, as [#1471](https://github.com/nf-core/sarek/pull/1471) was not working as expected +- [#1472](https://github.com/nf-core/sarek/pull/1472) - Load `spark` profile last to avoid issues with `test` profiles + +### Removed + +- [#1405](https://github.com/nf-core/sarek/pull/1405) - Removing docker.userEmulation + +### Dependencies + +| Dependency | Old version | New version | +| ------------ | ----------- | ----------- | +| `bcftools` | 1.17 | 1.18 | +| `ensemblvep` | 110.0 | 111.0 | +| `fgbio` | 2.0.2 | 2.1.0 | +| `gatk` | 4.4.0.0 | 4.5.0.0 | +| `gatk-spark` | 4.4.0.0 | 4.5.0.0 | +| `mosdepth` | 0.3.3 | 0.3.6 | +| `multiqc` | 1.17 | 1.18 | +| `samtools` | 1.17 | 1.19.2 | + +### Modules / Subworkflows + +| script | Old name | New name | +| ------ | -------- | -------- | + +### Parameter + +| Old name | New name | +| ---------------------------- | -------------------------- | +| `bcftools_annotations_index` | `bcftools_annotations_tbi` | + +## [3.4.0](https://github.com/nf-core/sarek/releases/tag/3.4.0) - Pårtetjåkko + +Pårtetjåkko is a mountain in the south of the park. + +### Added + +- [#1113](https://github.com/nf-core/sarek/pull/1113) - Adding CNVkit genemetrics module +- [#1193](https://github.com/nf-core/sarek/pull/1193) - Adding support for Sentieon's DnaScope for germline variant-calling including joint-germline +- [#1244](https://github.com/nf-core/sarek/pull/1244) - Add bcf annotate module +- [#1252](https://github.com/nf-core/sarek/pull/1252) - Added NGSCheckMate tool for checking that samples come from the same individual +- [#1271](https://github.com/nf-core/sarek/pull/1271) - Back to dev +- [#1288](https://github.com/nf-core/sarek/pull/1288) - Add nf-test continuous integration (but no tests) +- [#1290](https://github.com/nf-core/sarek/pull/1290) - Add nf-test for whole pipeline + +### Changed + +- [#1278](https://github.com/nf-core/sarek/pull/1278) - Hide sentieon parameters similar to other variant callers +- [#1280](https://github.com/nf-core/sarek/pull/1280) - Replacing link to `SentieonDNAscopeModel1.1.model` in Sentieon's S3 with link to same file in igenomes' S3 +- [#1303](https://github.com/nf-core/sarek/pull/1303) - Ressurect vep_version params and changed its scope to pipeline to enable usage for vep loftee plugin +- [#1304](https://github.com/nf-core/sarek/pull/1304) - Update modules +- [#1311](https://github.com/nf-core/sarek/pull/1311) - Update local modules with an `environment.yml` file +- [#1317](https://github.com/nf-core/sarek/pull/1317) - Add new tools to subway map +- [#1325](https://github.com/nf-core/sarek/pull/1325) - Move `sentieon_dnascope_model` params into `igenomes.config` +- [#1325](https://github.com/nf-core/sarek/pull/1325) - Refactor config files +- [#1327](https://github.com/nf-core/sarek/pull/1327) - Update modules to have an conda environment name + +### Fixed + +- [#1277](https://github.com/nf-core/sarek/pull/1277) - Fix null value issue for Mutect2 joint calling +- [#1287](https://github.com/nf-core/sarek/pull/1287) - Adding label `process_single` to local modules +- [#1298](https://github.com/nf-core/sarek/pull/1298) - Fix annotation cache usage +- [#1301](https://github.com/nf-core/sarek/pull/1301) - Fix nf-prov usage +- [#1315](https://github.com/nf-core/sarek/pull/1315) - Avoid clash of configs of `FILTERVARIANTTRANCHES` in the Sentieon-Haplotyper and GATK-Haplotypecaller subworkflows +- [#1318](https://github.com/nf-core/sarek/pull/1218) - Fix writing of params.json on S3 +- [#1324](https://github.com/nf-core/sarek/pull/1324) - Fix various typos & code formatting +- [#1325](https://github.com/nf-core/sarek/pull/1325) - Update bcfannotate tests and related config files +- [#1328](https://github.com/nf-core/sarek/pull/1328) - Fix links to docs in `nextflow_schema.json` and `docs/output.md` +- [#1328](https://github.com/nf-core/sarek/pull/1328) - Add missing icons in `nextflow_schema.json` +- [#1330](https://github.com/nf-core/sarek/pull/1330) - Add SnpEff to full sized tests + +### Removed + +- [#1298](https://github.com/nf-core/sarek/pull/1298) - Remove `--use_annotation_cache_keys` params + +### Dependencies + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `fastqc` | 0.11.9 | 0.12.1 | +| `multiqc` | 1.15 | 1.17 | + +### Modules / Subworkflows + +| script | Old name | New name | +| ----------------------------- | ----------------------------- | ----------------------------- | +| `gatk4spark/applybqsr` | `GATK4_APPLYBQSRSPARK` | `GATK4SPARK_APPLYBQSR` | +| `gatk4spark/baserecalibrator` | `GATK4_BASERECALIBRATORSPARK` | `GATK4SPARK_BASERECALIBRATOR` | +| `gatk4spark/markduplicates` | `GATK4_MARKDUPLICATESSPARK` | `GATK4SPARK_MARKDUPLICATES` | + +## [3.3.2](https://github.com/nf-core/sarek/releases/tag/3.3.2) - Ráhpajávvre + +Ráhpajávvre is the Lule Sámi spelling of Rapaselet. + +### Added + +- [#1246](https://github.com/nf-core/sarek/pull/1246) - Back to dev +- [#1259](https://github.com/nf-core/sarek/pull/1259) - nf-prov plugin +- [#1288](https://github.com/nf-core/sarek/pull/1288) - Add nf-test continuous integration + +### Changed + +- [#1248](https://github.com/nf-core/sarek/pull/1248) - Improve annotation-cache docs +- [#1261](https://github.com/nf-core/sarek/pull/1261) - Enable cache for annotation generation when using 'merge' + +### Fixed + +- [#1247](https://github.com/nf-core/sarek/pull/1247) - FIX: Result paths for full size test to be correctly displayed on the website +- [#1256](https://github.com/nf-core/sarek/pull/1256) - Fix issue with controlfreec container declaration +- [#1270](https://github.com/nf-core/sarek/pull/1270) - Revert controlfreec/assesssignificance module to 11.6 + +### Dependencies + +| Dependency | Old version | New version | +| ---------------------------------- | ----------- | ----------- | +| `Control-FREEC/assesssignificance` | 11.6b | 11.6 | + +## [3.3.1](https://github.com/nf-core/sarek/releases/tag/3.3.1) - Biellorippjávrre + +A lake near the Rapaselet delta. + +### Added + +- [#1231](https://github.com/nf-core/sarek/pull/1231) - Back to dev + +### Changed + +- [#1242](https://github.com/nf-core/sarek/pull/1242) - Simplify sentieon nf-core test license usage +- [#1243](https://github.com/nf-core/sarek/pull/1243) - Improve json schema usage for input + +### Fixed + +- [#1232](https://github.com/nf-core/sarek/pull/1232) - Fix Zenodo IDs in manifest +- [#1236](https://github.com/nf-core/sarek/pull/1236) - Fix annotation cache folder verification when no annotation +- [#1240](https://github.com/nf-core/sarek/pull/1240) - Disable JVM Hotspot in all modules/gatk4 ([#1030](https://github.com/nf-core/sarek/issues/1030)) +- [#1241](https://github.com/nf-core/sarek/pull/1241) - Fix axis text of controlfreec plots closing [#921](https://github.com/nf-core/sarek/issues/921) + +### Dependencies + +| Dependency | Old version | New version | +| --------------- | ----------- | ----------- | +| `Control-FREEC` | 11.6 | 11.6b | + +## [3.3.0](https://github.com/nf-core/sarek/releases/tag/3.3.0) - Rapaselet + +Rapaselet is a delta formed by the Rapaätno river between the Bielloriehppe massif (formerly written Piellorieppe) and the Skårki massif. + +### Added + +- [#930](https://github.com/nf-core/sarek/pull/930) - Add more manual tests +- [#1130](https://github.com/nf-core/sarek/pull/1130) - Back to dev +- [#1013](https://github.com/nf-core/sarek/pull/1013) - Mutect2 multi sample mode with `--joint_mutect2` +- [#1153](https://github.com/nf-core/sarek/pull/1153) - Add input validation for Sentieon & FGBio UMI incompatibility +- [#1158](https://github.com/nf-core/sarek/pull/1158) - Add preprint +- [#1159](https://github.com/nf-core/sarek/pull/1159) - ISMB Poster +- [#1173](https://github.com/nf-core/sarek/pull/1173) - CI tests for VQSR track with stub runs +- [#1122](https://github.com/nf-core/sarek/pull/1122), [#1196](https://github.com/nf-core/sarek/pull/1196) - Add `annotation cache` functionality +- [#1184](https://github.com/nf-core/sarek/pull/1184) - Stub-based CI-test of Sentieon joint-germline variant-calling with VQSR + +### Changed + +- [#1151](https://github.com/nf-core/sarek/pull/1151) - Refactor codebase +- [#1157](https://github.com/nf-core/sarek/pull/1157) - Move all vep args from `ext.args` to `params.vep_custom_args` to allow easier modifications +- [#1059](https://github.com/nf-core/sarek/pull/1059) - Add `nf-validation` for samplesheet validation +- [#1160](https://github.com/nf-core/sarek/pull/1160) - Updating tiddit to v3.6.1 +- [#1166](https://github.com/nf-core/sarek/pull/1166) - More info about `--tools` +- [#1173](https://github.com/nf-core/sarek/pull/1173) - Refactor single sample filtering of Haplotypecaller generated VCFs ([#1053](https://github.com/nf-core/sarek/pull/1053)) +- [#1174](https://github.com/nf-core/sarek/pull/1174) - Updating multiqc to v1.15 +- [#1179](https://github.com/nf-core/sarek/pull/1179) - Unhide params `trim_fastq`, `umi_read_structure`, and `aligner` +- [#1180](https://github.com/nf-core/sarek/pull/1180) - Updating the nf-core modules +- [#1198](https://github.com/nf-core/sarek/pull/1198) - Prepare release `3.3.0` +- [#1200](https://github.com/nf-core/sarek/pull/1200) - Streamline Github Actions workflows +- [#1212](https://github.com/nf-core/sarek/pull/1212) - Use matrix for AWS megatests +- [#1218](https://github.com/nf-core/sarek/pull/1218) - Remove Singularity tests for GHA +- [#1227](https://github.com/nf-core/sarek/pull/1227) - Update modules + +### Fixed + +- [#1143](https://github.com/nf-core/sarek/pull/1143) - `snpeff_db` is now a string +- [#1145](https://github.com/nf-core/sarek/pull/1145) - Fixed Zenodo links in `README.md` and in `WorkflowMain.groovy` +- [#1149](https://github.com/nf-core/sarek/pull/1149) - Update `Manta` modules and fix usage of `--exome` flag +- [#1155](https://github.com/nf-core/sarek/pull/1155) - Restore proper rendering in `usage.md` +- [#1163](https://github.com/nf-core/sarek/pull/1163) - Correcting location of output folder for joint variant calling with GATK's haplotypecaller +- [#1169](https://github.com/nf-core/sarek/pull/1169) - Updating Sentieon-modules. (The conda-check in the Sentieon-modules was moved to the script-section. The version of Sentieon remain unchanged.) +- [#1171](https://github.com/nf-core/sarek/pull/1171) - Fix channel logic for germline resource to skip GetPileupSummary if not provided +- [#1172](https://github.com/nf-core/sarek/pull/1172) - Publish gvcf files when all intervals are processed at once ([#764](https://github.com/nf-core/sarek/issues/764)) +- [#1173](https://github.com/nf-core/sarek/pull/1173) - Fixed duplicated entries in joint germline recalibrated VCF ([#966](https://github.com/nf-core/sarek/pull/966), [#1102](https://github.com/nf-core/sarek/pull/1102)), + fixed grouping joint germline recalibrated VCF ([#1137](https://github.com/nf-core/sarek/pull/1137)) +- [#1177](https://github.com/nf-core/sarek/pull/1177) - Fix status inference when using nf-validation plugin +- [#1181](https://github.com/nf-core/sarek/pull/1181) - Fix join mismatch error in Mutect2 tumor only subworkflow +- [#1183](https://github.com/nf-core/sarek/pull/1183) - Add docs for concatentated germline variants +- [#1184](https://github.com/nf-core/sarek/pull/1184) - Fix issue with duplicated variants in VCF from Sentieon-based joint-germline variant-calling with VQSR. (Corresponding to [#966](https://github.com/nf-core/sarek/issues/966) for GATK.) +- [#1192](https://github.com/nf-core/sarek/pull/1192) - Add `ASCATprofile.png` to ASCAT output docs +- [#1197](https://github.com/nf-core/sarek/pull/1197) - Improve `tower.yml` file to display reports in Tower ([#1190](https://github.com/nf-core/sarek/issues/1190)) +- [#1202](https://github.com/nf-core/sarek/pull/1202) - Remove GHA step that caches Nextflow and bump other out of date actions +- [#1203](https://github.com/nf-core/sarek/pull/1203) - Fix issue with Singularity containers on test profiles +- [#1204](https://github.com/nf-core/sarek/pull/1204) - Fix issue with nf-validation: lane can be a requirement of bam too now +- [#1205](https://github.com/nf-core/sarek/pull/1205) - Less tests triggered +- [#1214](https://github.com/nf-core/sarek/pull/1214) - Don't pass in intervals file to ControlFREEC for WGS analysis +- [#1215](https://github.com/nf-core/sarek/pull/1215) - Fix `meta.id` for mutect2 tumor_only subworkflows +- [#1216](https://github.com/nf-core/sarek/pull/1216) - Better test coverage for variant calling `*_all` subworkflows +- [#1217](https://github.com/nf-core/sarek/pull/1217) - Fix `groupTuple` statement for mutect2 tumor_only subworkflows +- [#1220](https://github.com/nf-core/sarek/pull/1220) - Fix channel and meta logic for `joint_mutect2` feature +- [#1221](https://github.com/nf-core/sarek/pull/1221) - Remove `lane` meta field after samplesheet validation to ensure proper merging after mapping +- [#1222](https://github.com/nf-core/sarek/pull/1222) - Better documentation for annotation cache +- [#1224](https://github.com/nf-core/sarek/pull/1224) - Update BCFTOOLS_SORT module with `--temp-dir .` added as option, which was required for Singularity +- [#1225](https://github.com/nf-core/sarek/pull/1225) - Better test coverage for all tests +- [#1227](https://github.com/nf-core/sarek/pull/1227) - Lint warning fix +- [#1229](https://github.com/nf-core/sarek/pull/1229) - Fix md5sum for gatk4_spark tests +- [#1230](https://github.com/nf-core/sarek/pull/1230) - Fix md5sum for sentieon aligner tests + +### Dependencies + +| Dependency | Old version | New version | +| ------------- | ------------------------- | ------------------------ | +| `cnvkit` | 0.9.9 (`samtools` 1.16.1) | 0.9.10 (`samtools` 1.17) | +| `ensembl-vep` | 108 | 110 | +| `grep` | 3.4 | 3.11 | +| `multiqc` | 1.14 | 1.15 | +| `tiddit` | 3.3.2 | 3.6.1 | + +## [3.2.3](https://github.com/nf-core/sarek/releases/tag/3.2.3) - Gällivare + +Gällivare is a small lake next to Pierikjaure. + +### Added + +- [#1112](https://github.com/nf-core/sarek/pull/1112) - Back to dev +- [#1119](https://github.com/nf-core/sarek/pull/1119) - Added `help_text` for `input_output_options` group in schema +- [#1044](https://github.com/nf-core/sarek/pull/1044) - Adding support for several tools from Sentieon's DNAseq package. The standard fastq-to-vcf processing can now be done using Sentieon's DNAseq tools `ApplyVarCal`, `bwa mem`, `Dedup`, `GVCFtyper`, `Haplotyper`, `LocusCollector` and `VarCal` + +### Changed + +- [#1119](https://github.com/nf-core/sarek/pull/1119) - Remove `null` by default in schema +- [#1128](https://github.com/nf-core/sarek/pull/1128) - Prepare release `3.2.3` + +### Fixed + +- [#1118](https://github.com/nf-core/sarek/pull/1118) - Remove `public_aws_ecr` profile + +## [3.2.2](https://github.com/nf-core/sarek/releases/tag/3.2.2) - Vuoinesluobbalah + +Vuoinesluobbalah is a lake close to Bierikjávrre. + +### Added + +- [#1106](https://github.com/nf-core/sarek/pull/1106) - Add Slack integration to Megatests +- [#1107](https://github.com/nf-core/sarek/pull/1107) - Add `singularity.registry` to `public_aws_ecr` + +### Changed + +- [#1087](https://github.com/nf-core/sarek/pull/1087) - Back to dev +- [#1087](https://github.com/nf-core/sarek/pull/1087) - Minor modules update +- [#1088](https://github.com/nf-core/sarek/pull/1088) - Replace profile `test` by `test_cache` and add a `test` profile without hidden files +- [#1095](https://github.com/nf-core/sarek/pull/1095) - Prepare release `3.2.2` + +### Fixed + +- [#1087](https://github.com/nf-core/sarek/pull/1087) - Fix wrong default memory in `GATK4_CREATESEQUENCEDICTIONARY` [#1085](https://github.com/nf-core/sarek/pull/1085) +- [#1089](https://github.com/nf-core/sarek/pull/1089) - Remove duplicated code +- [#1093](https://github.com/nf-core/sarek/pull/1093) - Fixing Ascat by reverting meta.id in channels allele_files, loci_files, gc_file and rt_file to baseName +- [#1098](https://github.com/nf-core/sarek/pull/1098) - Fix Channel issue in Mutect2 subworkflow [#1094](https://github.com/nf-core/sarek/pull/1094) +- [#1100](https://github.com/nf-core/sarek/pull/1100) - Remove duplicate index with deepvariant when no_intervals [#1069](https://github.com/nf-core/sarek/pull/1069) +- [#1101](https://github.com/nf-core/sarek/pull/1101) - Remove duplicate index computation for GATK4 Markduplicates & [#1065](https://github.com/nf-core/sarek/issues/1065) +- [#1101](https://github.com/nf-core/sarek/pull/1101) - Fix GATK4 version for GATK4 MarkduplicatesSpark [#1068](https://github.com/nf-core/sarek/issues/1068) +- [#1105](https://github.com/nf-core/sarek/pull/1105) - Remove `params.tracedir` +- [#1108](https://github.com/nf-core/sarek/pull/1108) - Refactor bad prefix definition for vcf files [#938](https://github.com/nf-core/sarek/issues/938) +- [#1109](https://github.com/nf-core/sarek/pull/1109) - Fix `mpileup` for variantcalling: only `bcftools` run and file publishing + +## [3.2.1](https://github.com/nf-core/sarek/releases/tag/3.2.1) - Pierikjaure + +Pierikjaure is a previous spelling of Bierikjávrre. + +### Changed + +- [#1073](https://github.com/nf-core/sarek/pull/1073) - Back to dev +- [#1080](https://github.com/nf-core/sarek/pull/1080) - Prepare release `3.2.1` +- [#1082](https://github.com/nf-core/sarek/pull/1082) - Bump minimal Nextflow version to 23.04.0 + +### Fixed + +- [#1078](https://github.com/nf-core/sarek/pull/1078) - Update tabix/bgziptabix module to fix typo +- [#1079](https://github.com/nf-core/sarek/pull/1079) - Fixed typo in profile name for tower aws megatests +- [#1082](https://github.com/nf-core/sarek/pull/1082) - Patch more modules to use quay.io registry +- [#1082](https://github.com/nf-core/sarek/pull/1082) - Update `public_aws_ecr` profile +- [#1082](https://github.com/nf-core/sarek/pull/1082) - Add quay.io as singularity default registry + +## [3.2.0](https://github.com/nf-core/sarek/releases/tag/3.2.0) - Bierikjávrre + +Bierikjávrre is one of the largest lake in Sarek. + +### Added + +- [#864](https://github.com/nf-core/sarek/pull/864) - Added possibilities to export assembled haplotypes and locally realigned reads +- [#792](https://github.com/nf-core/sarek/pull/792) - Added the option `--concatenate_vcfs` for concatenating the germline VCF files. Per default, the resulting vcf-files will be placed under `/variant_calling/concat` +- [#889](https://github.com/nf-core/sarek/pull/889) - Added possibilities to skip variant filtering after Haplotypecaller +- [#945](https://github.com/nf-core/sarek/pull/945) - Adding Adam Talbot to contributor list +- [#954](https://github.com/nf-core/sarek/pull/954) - Adding keys for annotation with snpeff and ensemblvep for `hg19`, `hg38` and `mm10` +- [#967](https://github.com/nf-core/sarek/pull/967) - Adding new `outdir_cache` params +- [#971](https://github.com/nf-core/sarek/pull/971) - Subtle bugfix to correct mutation of FASTP output channel objects +- [#978](https://github.com/nf-core/sarek/pull/978) - Validate that patient/sample does not contain spaces +- [#981](https://github.com/nf-core/sarek/pull/981) - Added documentation on generating ASCAT resources for exome and targeted sequencing +- [#1041](https://github.com/nf-core/sarek/pull/1041) - Add params `vep_custom_args` to let user specify custom params more easily for `VEP` +- [#1045](https://github.com/nf-core/sarek/pull/1045) - Add `public_aws_ecr` for using ECR hosted containers + +### Changed + +- [#859](https://github.com/nf-core/sarek/pull/859) - Back to dev +- [#860](https://github.com/nf-core/sarek/pull/860) - Replace local subworkflow with nf-core version - `vcf_annotate_snpeff` +- [#865](https://github.com/nf-core/sarek/pull/865) - Replace local subworkflow with nf-core version - `vcf_annotate_ensemblvep` +- [#874](https://github.com/nf-core/sarek/pull/874) - Update all modules +- [#882](https://github.com/nf-core/sarek/pull/882) - Remove exit strategy for `Manta`/`Strelka` +- [#890](https://github.com/nf-core/sarek/pull/890) - Sync `TEMPLATE` with `tools` `2.7.1` +- [#896](https://github.com/nf-core/sarek/pull/896) - Code refactoring +- [#898](https://github.com/nf-core/sarek/pull/898) - Nextflow minimal version is now `22.10.1` +- [#898](https://github.com/nf-core/sarek/pull/898) - Sync `TEMPLATE` with `tools` `2.7.2` +- [#909](https://github.com/nf-core/sarek/pull/909) - Cache test data on GHA +- [#928](https://github.com/nf-core/sarek/pull/928) - No need for BAI when starting from uBAM +- [#935](https://github.com/nf-core/sarek/pull/935) - Add params `build_only_index` to only build index +- [#936](https://github.com/nf-core/sarek/pull/936) - Add params `donwload_cache` to download annotation cache +- [#942](https://github.com/nf-core/sarek/pull/942) - Update `README.md` +- [#967](https://github.com/nf-core/sarek/pull/967) - Update and detail extensively how to use annotation cache +- [#968](https://github.com/nf-core/sarek/pull/968) - Update all modules +- [#1011](https://github.com/nf-core/sarek/pull/1011) - Sync `TEMPLATE` with `tools` `2.8` +- [#1012](https://github.com/nf-core/sarek/pull/1012) - Better handling of meta maps in `bam_variant_calling_somatic_mutect2` +- [#1014](https://github.com/nf-core/sarek/pull/1014) - `snpeff_db` is now only the `db` version and not `genome.db` +- [#1015](https://github.com/nf-core/sarek/pull/1015) - Increase default value for `--nucleotides_per_second` to `200000` resulting in 21 groups for `GATK.GRCh38` +- [#1019](https://github.com/nf-core/sarek/pull/1019) - Set a default registry outside of profile scope +- [#1031](https://github.com/nf-core/sarek/pull/1031) - Update pipeline summary +- [#1032](https://github.com/nf-core/sarek/pull/1032) - Update all modules +- [#1051](https://github.com/nf-core/sarek/pull/1051) - Update more modules +- [#1056](https://github.com/nf-core/sarek/pull/1056) - Bump pipeline version to `3.2.0` + +### Fixed + +- [#870](https://github.com/nf-core/sarek/pull/870) - Fix output for locally realigned reads from haplotypecaller +- [#874](https://github.com/nf-core/sarek/pull/874) - Remove `CITATION.cff` +- [#893](https://github.com/nf-core/sarek/pull/893) - Fix logic of when to execute tabix on dbsnp +- [#894](https://github.com/nf-core/sarek/pull/894) - Add description to `--cnvkit_reference` +- [#894](https://github.com/nf-core/sarek/pull/894) - Remove methods description TODO prompt +- [#927](https://github.com/nf-core/sarek/pull/927) - Fix tumor only variant calling issues with freebayes following [#896](https://github.com/nf-core/sarek/pull/896) +- [#928](https://github.com/nf-core/sarek/pull/928) - Fix [#700](https://github.com/nf-core/sarek/issues/700) +- [#929](https://github.com/nf-core/sarek/pull/929) - Fix somatic variant calling issues with msisensor following [#896](https://github.com/nf-core/sarek/pull/896) +- [#941](https://github.com/nf-core/sarek/pull/941) - Fix json validation for `tools`, `skip_tools` and `use_gatk_spark` [#892](https://github.com/nf-core/sarek/issues/892) +- [#954](https://github.com/nf-core/sarek/pull/954) - Fix missing annotation keys with `snpeff` and `ensemblvep` for `hg19` +- [#957](https://github.com/nf-core/sarek/pull/957) - Add `failOnDuplicate` and `failOnMismatch` options to all `join()` operator where it was possible +- [#982](https://github.com/nf-core/sarek/pull/982) - Remove usage of exit statements, using `Nextflow.error` instead +- [#985](https://github.com/nf-core/sarek/pull/985) - Cache correctly identifies when it needs to be updated +- [#988](https://github.com/nf-core/sarek/pull/988) - Updated ascat module to fix seed for reproducibility +- [#998](https://github.com/nf-core/sarek/pull/998) - Remove parallelization within a sample for `Manta` +- [#1014](https://github.com/nf-core/sarek/pull/1014) - Fix calls to `ensemblvep` and `snpeff` containers +- [#1022](https://github.com/nf-core/sarek/pull/1022) - Fix call to variantrecalibrator. (Making sure that dbsnp_vqsr, known_indels_vqsr and known_snps_vqsr are channels, and not strings.) +- [#1039](https://github.com/nf-core/sarek/pull/1039) - Remove concatenate_vcfs tests with singularity, as they are failing due to not enough space on GHA runners +- [#1040](https://github.com/nf-core/sarek/pull/1040) - Fix dict channel issue due to [#1032](https://github.com/nf-core/sarek/pull/1032) +- [#1043](https://github.com/nf-core/sarek/pull/1043) - Fix typo in the tags.yml files from [#978](https://github.com/nf-core/sarek/pull/978) +- [#1048](https://github.com/nf-core/sarek/pull/1048) - Skip tool validation on annotation to fix [#949](https://github.com/nf-core/sarek/issues/949), check that bam is bam and cram is cram [#895](https://github.com/nf-core/sarek/issues/895) +- [#1050](https://github.com/nf-core/sarek/pull/1050) - Disable GATK VCF filters when joint calling to fix [#1025](https://github.com/nf-core/sarek/issues/1025) +- [#1055](https://github.com/nf-core/sarek/pull/1055) - Fix pattern for fasta file in the json schema +- [#1058](https://github.com/nf-core/sarek/pull/1058) - Fix container declaration for VCFTOOLS as it has been updated in the registry +- [#1061](https://github.com/nf-core/sarek/pull/1061) - Fix GenomicsDB also works with one interval file, fix results publishing of GenomicsDB +- [#1062](https://github.com/nf-core/sarek/pull/1062) - Fix automatic restart from steps +- [#1063](https://github.com/nf-core/sarek/pull/1063) - Fix join duplication for manta/strelka + +### Removed + +- [#898](https://github.com/nf-core/sarek/pull/898) - Params `enable_conda` was removed +- [#1070](https://github.com/nf-core/sarek/pull/1070) - Remove Sarek version from workflow and subway map pictures + +### Dependencies + +| Dependency | Old version | New version | +| ------------- | ----------- | ----------- | +| `ascat` | 3.0.0 | 3.1.1 | +| `bcftools` | 1.15.1 | 1.17 | +| `deepvariant` | 1.4.0 | 1.5.0 | +| `ensembl-vep` | 106.1 | 108.2 | +| `fastp` | 0.23.2 | 0.23.4 | +| `multiqc` | 1.13a | 1.14 | +| `samtools` | 1.16 | 1.17 | +| `svdb` | 2.6.1 | 2.8.1 | + +### Modules / Subworkflows + +| script | Old name | New name | +| --------------------- | ------------ | --------------------- | +| `ensemblvep/download` | | 'ENSEMBLVEP_DOWNLOAD' | +| `ensemblvep/vep` | 'ENSEMBLVEP' | 'ENSEMBLVEP_VEP' | +| `snpeff/download` | | 'SNPEFF_DOWNLOAD' | +| `snpeff/snpeff` | 'SNPEFF' | 'SNPEFF_SNPEFF' | + +## [3.1.2](https://github.com/nf-core/sarek/releases/tag/3.1.2) - Lesser Lule River + +Lesser Lule River is English for Lilla Luleälven + +### Added + +### Changed + +### Fixed + +- [#906](https://github.com/nf-core/sarek/pull/906) - Remove usages of deprecated `Channel.from` method + +### Deprecated + +### Removed + +### Dependencies + +## [3.1.1](https://github.com/nf-core/sarek/releases/tag/3.1.1) - Lilla Luleälven + +Lilla Luleälven river's main affluent is Rapaätno. + +### Added + +- [#856](https://github.com/nf-core/sarek/pull/856) - Add annotation for `R64-1-1` and `UMD3.1` + +### Changed + +- [#855](https://github.com/nf-core/sarek/pull/855) - Speed up duplicate marking by using `samtools` for CRAM conversion +- [#858](https://github.com/nf-core/sarek/pull/858) - Prepare release `3.1.1` + +### Fixed + +- [#851](https://github.com/nf-core/sarek/pull/851) - Fix `schema` definition `None` for `cf_chrom_len` + +### Deprecated + +### Removed + +### Dependencies + +## [3.1](https://github.com/nf-core/sarek/releases/tag/3.1) - Rapaätno + +Rapaätno is the river you can see from the Skierfe mountain. + +### Added + +- [#735](https://github.com/nf-core/sarek/pull/735) - GATK Markduplicates now natively supports CRAM output +- [#774](https://github.com/nf-core/sarek/pull/774) - Add logo for Danish National Genome Center +- [#783](https://github.com/nf-core/sarek/pull/783) - Add paths for chr length used by controlfreec to GRCh38 config +- [#820](https://github.com/nf-core/sarek/pull/820) - Improve documentation on scatter/gather effects +- [#833](https://github.com/nf-core/sarek/pull/833) - Add name to CI tests to avoid confusion between runs + +### Changed + +- [#735](https://github.com/nf-core/sarek/pull/735) - `--save_mapped` now saves mapping output in CRAM format +- [#762](https://github.com/nf-core/sarek/pull/762) - Back to dev +- [#762](https://github.com/nf-core/sarek/pull/762) - Update deepvariant module +- [#773](https://github.com/nf-core/sarek/pull/773) - Sync `TEMPLATE` with `tools` `2.6` +- [#782](https://github.com/nf-core/sarek/pull/782) - Reduce scatter/gather for full size tests on AWS +- [#785](https://github.com/nf-core/sarek/pull/785) - Update description of `bcftools stats` +- [#784](https://github.com/nf-core/sarek/pull/784) - Update all subworkflows names thanks to @scorreard +- [#806](https://github.com/nf-core/sarek/pull/806) - Refactor all tests +- [#806](https://github.com/nf-core/sarek/pull/806) - Split up `modules.config` file +- [#810](https://github.com/nf-core/sarek/pull/810) - Update CHANGELOG +- [#821](https://github.com/nf-core/sarek/pull/821) - Change `replace` to `putIfAbsent` for automatic search of `input` if none is provided to avoid overwriting values +- [#822](https://github.com/nf-core/sarek/pull/822) - Update modules with `nf-core modules update -a`: Update GATK version to 4.3.0 +- [#827](https://github.com/nf-core/sarek/pull/827) - Add `--genomicsdb-shared-posixfs-optimizations true --bypass-feature-reader` to `GenomicsDB` parameters to speed up the analysis +- [#842](https://github.com/nf-core/sarek/pull/842) - Increase default memory for samtools stats +- [#844](https://github.com/nf-core/sarek/pull/844) - All small scale tests are run on PR to `master` + +### Fixed + +- [#762](https://github.com/nf-core/sarek/pull/762) - Polish CHANGELOG + figures +- [#766](https://github.com/nf-core/sarek/pull/766) - Align box description in subway map +- [#768](https://github.com/nf-core/sarek/pull/768) - Use double quotes to fix import of singularity images for deepvariant module +- [#770](https://github.com/nf-core/sarek/pull/770) - Use double quotes to fix import of singularity images for gatk4/cnnscorevariants module +- [#771](https://github.com/nf-core/sarek/pull/771) - update to new modules syntax +- [#777](https://github.com/nf-core/sarek/pull/777) - Fix mixed up aws full size tests output paths +- [#790](https://github.com/nf-core/sarek/pull/790) - Fix issue [#789](https://github.com/nf-core/sarek/issues/789) somatic mutect2 test +- [#793](https://github.com/nf-core/sarek/pull/793) - Remove DeepVariant GVCF from annotation +- [#794](https://github.com/nf-core/sarek/pull/794) - Fix publishing for unzipped reference files +- [#807](https://github.com/nf-core/sarek/pull/807) - Fix read group when uBAMs are provided (see issue [#732](https://github.com/nf-core/sarek/issues/732)) +- [#813](https://github.com/nf-core/sarek/pull/813) - Fix input validation when launching from website (see issue [#694](https://github.com/nf-core/sarek/issues/694)) +- [#814](https://github.com/nf-core/sarek/pull/814) - Fix readgroups when using DragMap together with FreeBayes or Mutect2 (see issue [#780](https://github.com/nf-core/sarek/issues/780)) +- [#817](https://github.com/nf-core/sarek/pull/817) - Fix CNVKit run on tumor-only sample to be run on all samples +- [#828](https://github.com/nf-core/sarek/pull/817) - Fix issue [#763](https://github.com/nf-core/sarek/issues/763) to run variantcalling when starting form step recalibration +- [#837](https://github.com/nf-core/sarek/pull/837) - Fix Freebayes config selector after subworkflow renaming +- [#839](https://github.com/nf-core/sarek/pull/839) - Remove `copyTo` method that fails on S3 when the source and destination buckets are in different regions +- [#841](https://github.com/nf-core/sarek/pull/841) - Fix path priority for `cf_chrom_len` + +### Deprecated + +### Removed + +### Dependencies + +| Dependency | Old version | New version | +| ------------- | ----------- | ----------- | +| `bcftools` | 1.15.1 | 1.16 | +| `deepvariant` | 1.3.0 | 1.4.0 | +| `freebayes` | 1.3.5 | 1.3.6 | +| `gatk4` | 4.2.6.1 | 4.3.0.0 | +| `samtools` | 1.15.1 | 1.16.1 | +| `tiddit` | 3.1.0 | 3.3.2 | + +## [3.0.2](https://github.com/nf-core/sarek/releases/tag/3.0.2) - Lájtávrre + +Lájtávrre is a lake you can see from the Skierfe mountain, formed by the Rapaätno river. + +### Added + +- [#691](https://github.com/nf-core/sarek/pull/691) - Enable `PROFILE=conda`, `PROFILE=docker` and `PROFILE=singularity` for pytest +- [#716](https://github.com/nf-core/sarek/pull/716) - Add documentation for Azure recommended config vm_size +- [#752](https://github.com/nf-core/sarek/pull/752) - Add tracking of all dependencies starting 3.0 + +### Changed + +- [#679](https://github.com/nf-core/sarek/pull/679) - Back to `dev` +- [#685](https://github.com/nf-core/sarek/pull/685) - Updating the nf-core modules used by Sarek +- [#691](https://github.com/nf-core/sarek/pull/691) - To run the same pytest as before locally, use `PROFILE=docker` +- [#692](https://github.com/nf-core/sarek/pull/692) - Use `params.tools=strelka` in profile `test` +- [#696](https://github.com/nf-core/sarek/pull/696) - Adding check of md5-sums in CI-tests +- [#719](https://github.com/nf-core/sarek/pull/719) - Added boxes to subway map +- [#720](https://github.com/nf-core/sarek/pull/720) - Sync `TEMPLATE` with `tools` `2.5` +- [#723](https://github.com/nf-core/sarek/pull/723) - Sync `TEMPLATE` with `tools` `2.5.1` +- [#726](https://github.com/nf-core/sarek/pull/726) - Adapt resource requests +- [#730](https://github.com/nf-core/sarek/pull/730) - Reduce number of tests +- [#731](https://github.com/nf-core/sarek/pull/731) - Run the somatic test as default on `-profile test_full`, the germline can be tested with `-profile test_full_germline` +- [#733](https://github.com/nf-core/sarek/pull/733) - Add description for params.cf_chrom_len +- [#734](https://github.com/nf-core/sarek/pull/734) - nf-core modules update -a +- [#736](https://github.com/nf-core/sarek/pull/736) - More extensive CI for default test +- [#742](https://github.com/nf-core/sarek/pull/742) - Requiring the Haplotypecaller to be specified as one of the tools for joint germline genotyping +- [#752](https://github.com/nf-core/sarek/pull/752) - Code polishing + +### Fixed + +- [#679](https://github.com/nf-core/sarek/pull/679) - Fixed typos in subway maps +- [#681](https://github.com/nf-core/sarek/pull/681) - Fixed intermediate files published cf [#680](https://github.com/nf-core/sarek/issues/680) +- [#688](https://github.com/nf-core/sarek/pull/688) - Fixed VEP plugins issue cf [#687](https://github.com/nf-core/sarek/issues/687) +- [#689](https://github.com/nf-core/sarek/pull/689) - Fixed when clause for non `BWA mem` building mapping indexes +- [#704](https://github.com/nf-core/sarek/pull/704) - Fixed `cf_ploidy` to string instead of number +- [#705](https://github.com/nf-core/sarek/pull/705) - Fix publishing for processes in `alignment_to_fastq` subworkflow; prevent tabix computation for `known_snps` when present; publish `umi` processed files into `preprocessing/umi` subdirectory +- [#706](https://github.com/nf-core/sarek/pull/706) - Fixed `vep_version` not found error when running `--vep_loftee` +- [#724](https://github.com/nf-core/sarek/pull/724) - Fixed prettier issue +- [#727](https://github.com/nf-core/sarek/pull/727) - Allow `.list` interval files; remove `seconds` from GRCh38 file to allow `--nucleotides_per_second` to be used +- [#728](https://github.com/nf-core/sarek/pull/728) - Circumvent issue with controlfreec and length file containing regions not in intervals file +- [#729](https://github.com/nf-core/sarek/pull/729) - Trailing commas in `--tools`, `--skip_tools` and `--use_gatk_spark` now raise failure cf [#722](https://github.com/nf-core/sarek/issues/722) +- [#741](https://github.com/nf-core/sarek/pull/741) - Fix prefix for `bcftools sort` for joint germline variant calling +- [#743](https://github.com/nf-core/sarek/pull/743) - Remove profile definitions in profile to avoid issues with Tower +- [#758](https://github.com/nf-core/sarek/pull/758) - Fix Zenodo batch +- [#760](https://github.com/nf-core/sarek/pull/760) - Fix CHANGELOG dependencies +- [#761](https://github.com/nf-core/sarek/pull/761) - Fix font in subway map and workflow image + +### Deprecated + +### Removed + +- [#742](https://github.com/nf-core/sarek/pull/742) - Removed some lines from the usage-doc as Sarek no longer support input supplied as a list of multiple csv-files +- [#757](https://github.com/nf-core/sarek/pull/757) - Remove `errorStrategy` in `conf/modules.config` + +## [3.0.1](https://github.com/nf-core/sarek/releases/tag/3.0.1) - Saiva + +Saiva is a lake in the Sarek national park, just below the Skierfe mountain. + +### Fixed + +- [#708](https://github.com/nf-core/sarek/pull/708) - Fixes mpileup bug. Update nf-core module `samtools/mpileup` to subset CRAM file by intervals + +## [3.0](https://github.com/nf-core/sarek/releases/tag/3.0) - Skierfe + +Skierfe is a mountain in the Sarek national park, and the inspiration for the logo. + +### Added + +- [#388](https://github.com/nf-core/sarek/pull/388) - Add cram support + read splitting with `SeqKit` for speedup +- [#394](https://github.com/nf-core/sarek/pull/394) - Add `DeepVariant` +- [#411](https://github.com/nf-core/sarek/pull/411) - cram in csv samplesheet +- [#448](https://github.com/nf-core/sarek/pull/448) - Allow to skip base quality recalibration with `--skip_bqsr` +- [#449](https://github.com/nf-core/sarek/pull/449) - [@FriederikeHanssen](https://github.com/FriederikeHanssen) is now a `CODEOWNERS` +- [#460](https://github.com/nf-core/sarek/pull/460) - Add posters +- [#463](https://github.com/nf-core/sarek/pull/463) - Add dark/light logo versions +- [#464](https://github.com/nf-core/sarek/pull/464), [#514](https://github.com/nf-core/sarek/pull/514) - Add `DRAGMAP` as a possible aligner +- [#479](https://github.com/nf-core/sarek/pull/479) - Add more subworkflows +- [#485](https://github.com/nf-core/sarek/pull/485) - `--skip_qc`, `--skip_markduplicates` and `--skip_bqsr` is now `--skip_tools` +- [#507](https://github.com/nf-core/sarek/pull/507), [#537](https://github.com/nf-core/sarek/pull/537) - Subway map for building indexes +- [#512](https://github.com/nf-core/sarek/pull/512), [#531](https://github.com/nf-core/sarek/pull/531), [#537](https://github.com/nf-core/sarek/pull/537) - Subway map for pipeline +- [#522](https://github.com/nf-core/sarek/pull/522) - Add QC for vcf files & MultiQC +- [#533](https://github.com/nf-core/sarek/pull/533) - Add param `--only_paired_variant_calling` to allow skipping of germline variantcalling for paired samples +- [#536](https://github.com/nf-core/sarek/pull/536) - Add `--step markduplicates` to start from duplicate marking, `--step prepare_recalibration` now ONLY starts at process `BaseRecalibrator` & adding `bam` and `cram` input support for `--step` `markduplicates`, `prepare_recalibration`, `recalibrate`, and `variant_calling` +- [#538](https://github.com/nf-core/sarek/pull/538) - Add param `--seq_platform`, default: `ILLUMINA` +- [#545](https://github.com/nf-core/sarek/pull/545) - Add modules and subworkflows for `cnvkit` tumor_only mode +- [#540](https://github.com/nf-core/sarek/pull/540) - Add modules and subworkflows for `cnvkit` somatic mode +- [#557](https://github.com/nf-core/sarek/pull/557) - Add `Haplotypecaller` single sample mode together with `CNNScoreVariants` and `FilterVariantTranches` +- [#576](https://github.com/nf-core/sarek/pull/576) - Add modules and subworkflows for `cnvkit` germline mode +- [#582](https://github.com/nf-core/sarek/pull/582) - Added option `--vep_out_format` for setting the format of the output-file from VEP to `json`, `tab` or `vcf` (default) +- [#594](https://github.com/nf-core/sarek/pull/594) - Add parameter `--save_output_as_bam` to allow output of result files in BAM format +- [#595](https://github.com/nf-core/sarek/pull/595) - Added Haplotypecaller joint germline calling +- [#597](https://github.com/nf-core/sarek/pull/597) - Added tiddit for tumor variant calling +- [#600](https://github.com/nf-core/sarek/pull/600) - Added description for UMI related params in schema +- [#604](https://github.com/nf-core/sarek/pull/604), [#617](https://github.com/nf-core/sarek/pull/617) - Added full size tests WGS 30x NA12878 +- [#613](https://github.com/nf-core/sarek/pull/613) - Added params `--dbnsfp_fields` to allow configuration of fields for the `dbnsfp` `VEP` plugin +- [#613](https://github.com/nf-core/sarek/pull/613) - Added params `--dbnsfp_consequence` to allow configuration of consequence for the `dbnsfp` `VEP` plugin +- [#613](https://github.com/nf-core/sarek/pull/613) - Added params `--vep_version` to allow more configuration on the vep container definition +- [#620](https://github.com/nf-core/sarek/pull/620) - Added checks for sex information when running a CNV tools +- [#623](https://github.com/nf-core/sarek/pull/623) - Additional checks of data in the input sample sheet +- [#629](https://github.com/nf-core/sarek/pull/629) - Added checks to catch inconsistency between supplied samples and requested tools +- [#632](https://github.com/nf-core/sarek/pull/632) - Added params `--snpeff_version` to allow more configuration on the snpeff container definition +- [#632](https://github.com/nf-core/sarek/pull/632) - Added params `--vep_include_fasta` to use the fasta file for annotation +- [#639](https://github.com/nf-core/sarek/pull/639) - Adding genes-txt-file and summary-html-file to the published output from snpEff +- [#647](https://github.com/nf-core/sarek/pull/647) - Update resource requests for preprocessing based on what worked for 5 ICGC matched WGS samples +- [#652](https://github.com/nf-core/sarek/pull/652) - Added full size somatic test profile + +### Changed + +- [#580](https://github.com/nf-core/sarek/pull/580) - changed the test_full config to real public WXS data. 1 sample WXS germline, 1 Tumor/Normal pair. https://doi.org/10.1038/sdata.2016.25 and https://doi.org/10.1038/s41587-021-00994-5 +- [#383](https://github.com/nf-core/sarek/pull/383), [#528](https://github.com/nf-core/sarek/pull/528) - Update `CHANGELOG` +- [#390](https://github.com/nf-core/sarek/pull/390) - Update `nextflow_schema.json` +- [#408](https://github.com/nf-core/sarek/pull/408) - Sync `TEMPLATE` with `tools` `2.0.1` +- [#416](https://github.com/nf-core/sarek/pull/416) - Sync `TEMPLATE` with `tools` `2.1` +- [#417](https://github.com/nf-core/sarek/pull/417) - Merge `dsl2` and `dev` branches +- [#419](https://github.com/nf-core/sarek/pull/419) - Improve preprocessing +- [#420](https://github.com/nf-core/sarek/pull/420), [#455](https://github.com/nf-core/sarek/pull/455), [#459](https://github.com/nf-core/sarek/pull/459), [#633](https://github.com/nf-core/sarek/pull/633) - `nf-core modules update --all` +- [#427](https://github.com/nf-core/sarek/pull/427) - Update `DeepVariant` +- [#462](https://github.com/nf-core/sarek/pull/462) - Update modules and `modules.config` +- [#465](https://github.com/nf-core/sarek/pull/465) - Improve `test_data.config` +- [#466](https://github.com/nf-core/sarek/pull/466), [#478](https://github.com/nf-core/sarek/pull/478), [#492](https://github.com/nf-core/sarek/pull/492), [#521](https://github.com/nf-core/sarek/pull/521) - Move some local modules to `nf-core/modules` +- [#466](https://github.com/nf-core/sarek/pull/466), [#485](https://github.com/nf-core/sarek/pull/485), [#492](https://github.com/nf-core/sarek/pull/492), [#494](https://github.com/nf-core/sarek/pull/494), [#515](https://github.com/nf-core/sarek/pull/515) - Improve preprocessing subworkflows +- [#474](https://github.com/nf-core/sarek/pull/474), [#475](https://github.com/nf-core/sarek/pull/475) - Sync `TEMPLATE` with `tools` `2.2` +- [#487](https://github.com/nf-core/sarek/pull/487), [#489](https://github.com/nf-core/sarek/pull/489), [#492](https://github.com/nf-core/sarek/pull/492), [#497](https://github.com/nf-core/sarek/pull/497), [#522](https://github.com/nf-core/sarek/pull/522), [#583](https://github.com/nf-core/sarek/pull/583) - Improve variant calling subworkflows +- [#498](https://github.com/nf-core/sarek/pull/498) - Update docs +- [#501](https://github.com/nf-core/sarek/pull/501) - Sync `TEMPLATE` with `tools` `2.3` +- [#511](https://github.com/nf-core/sarek/pull/511) - Sync `TEMPLATE` with `tools` `2.3.2` +- [#520](https://github.com/nf-core/sarek/pull/520) - Improve annotation subworkflows +- [#537](https://github.com/nf-core/sarek/pull/537) - Update workflow figure +- [#539](https://github.com/nf-core/sarek/pull/539) - Update `CITATIONS.md` +- [#544](https://github.com/nf-core/sarek/pull/544) - `Mutect2` is no longer compatible with `--no_intervals` +- [#551](https://github.com/nf-core/sarek/pull/551) - Sync `TEMPLATE` with `tools` `2.4` +- [#562](https://github.com/nf-core/sarek/pull/562) - Restart from `--step annotate` is now also requiring a CSV file +- [#563](https://github.com/nf-core/sarek/pull/563) - Updated subway map +- [#570](https://github.com/nf-core/sarek/pull/570) - Extract mpileup into its own subworkflow; zip mpileup files +- [#571](https://github.com/nf-core/sarek/pull/571) - Including and using GATK4's mergeVcfs +- [#572](https://github.com/nf-core/sarek/pull/572) - Adjusted subway map svg for firefox compatibility +- [#577](https://github.com/nf-core/sarek/pull/577) - Update `RELEASE_CHECKLIST` +- [#578](https://github.com/nf-core/sarek/pull/578) - Updated module deeptools/bamcoverage +- [#585](https://github.com/nf-core/sarek/pull/585) - Remove explicit BAM to CRAM conversion after MarkduplicatesSpark; tool does it internally +- [#581](https://github.com/nf-core/sarek/pull/581) - `TIDDIT` is updated to `3.1.0` +- [#593](https://github.com/nf-core/sarek/pull/593) - update `ensembl-vep` cache version and module +- [#600](https://github.com/nf-core/sarek/pull/600) - Remove `TODO` in awsfulltest +- [#606](https://github.com/nf-core/sarek/pull/606) - Updated `ASCAT` to version `3.0` as module +- [#608](https://github.com/nf-core/sarek/pull/608) - Prevent candidate VCFs from getting published in manta +- [#618](https://github.com/nf-core/sarek/pull/618) - Update `multiqc` module +- [#618](https://github.com/nf-core/sarek/pull/618) - Update test yml files +- [#620](https://github.com/nf-core/sarek/pull/620) - `gender` is now `sex` in the samplesheet +- [#630](https://github.com/nf-core/sarek/pull/630) - Update citations file +- [#632](https://github.com/nf-core/sarek/pull/632) - Update `snpEff` version to `5.1` and cache up to `105` +- [#632](https://github.com/nf-core/sarek/pull/632) - Update `VEP` version to `106.1` and cache up to `106` +- [#618](https://github.com/nf-core/sarek/pull/618) - Update `multiqc` module update test yml files +- [#618](https://github.com/nf-core/sarek/pull/618) - Update test yml files +- [#633](https://github.com/nf-core/sarek/pull/633) - Update `BCFTOOLS` version to `1.15.1` +- [#644](https://github.com/nf-core/sarek/pull/644) - Use `-Y` for `bwa-mem(2)` and remove `-M` +- [#645](https://github.com/nf-core/sarek/pull/645) - Merge `tests/nextflow.config` in `conf/test.config` +- [#646](https://github.com/nf-core/sarek/pull/646) - Update `nextflow_schema.json` to reflect new parameters and functions, removes `--annotation_cache`, removes `--ascat_chromosomes` +- [#649](https://github.com/nf-core/sarek/pull/649) - Update, simplify and add more files to all `test_*.yml` files +- [#651](https://github.com/nf-core/sarek/pull/651) - Added TIDDIT_SOMATIC subworkflow +- [#653](https://github.com/nf-core/sarek/pull/653) - Coherent results subfolder structure between preprocessing, variantcalling and reporting +- [#659](https://github.com/nf-core/sarek/pull/659) - Update usage.md docu section on `How to run ASCAT with WES` +- [#661](https://github.com/nf-core/sarek/pull/661) - Add cnvkit reference creation to index subway map +- [#662](https://github.com/nf-core/sarek/pull/662) - Add bgzipped and indexed GATKBundle reference files for `GATK.GRCh37` and replace germline-resources with GATKBundle one +- [#663](https://github.com/nf-core/sarek/pull/663) - Add separate parameters for `ASCAT` and `ControlFREEC` back in +- [#668](https://github.com/nf-core/sarek/pull/668) - Update annotation documentation +- [#674](https://github.com/nf-core/sarek/pull/664) - Default value for splitting is `50000000` + ### Fixed - [#234](https://github.com/nf-core/sarek/pull/234) - Switching to DSL2 @@ -22,15 +1498,124 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#241](https://github.com/nf-core/sarek/pull/241), [#248](https://github.com/nf-core/sarek/pull/248), [#250](https://github.com/nf-core/sarek/pull/250), [#257](https://github.com/nf-core/sarek/pull/257), [#259](https://github.com/nf-core/sarek/pull/259) - Add modules and sub workflow for preprocessing - [#242](https://github.com/nf-core/sarek/pull/242), [#244](https://github.com/nf-core/sarek/pull/244), [#245](https://github.com/nf-core/sarek/pull/245), [#246](https://github.com/nf-core/sarek/pull/246), [#247](https://github.com/nf-core/sarek/pull/247), [#249](https://github.com/nf-core/sarek/pull/249), [#252](https://github.com/nf-core/sarek/pull/252), [#256](https://github.com/nf-core/sarek/pull/256), [#263](https://github.com/nf-core/sarek/pull/263), [#264](https://github.com/nf-core/sarek/pull/264), [#283](https://github.com/nf-core/sarek/pull/283), [#285](https://github.com/nf-core/sarek/pull/285), [#338](https://github.com/nf-core/sarek/pull/338) - Refactor `dsl2` branch - [#257](https://github.com/nf-core/sarek/pull/257) - Use a params modules config file -- [#266](https://github.com/nf-core/sarek/pull/266), [#285](https://github.com/nf-core/sarek/pull/285), [#297](https://github.com/nf-core/sarek/pull/297) - Add modules and sub workflow for variant calling +- [#266](https://github.com/nf-core/sarek/pull/266), [#285](https://github.com/nf-core/sarek/pull/285), [#297](https://github.com/nf-core/sarek/pull/297) - Add modules and sub workflow for variant calling - [#333](https://github.com/nf-core/sarek/pull/333) - Bump `Sarek` version to `3.0dev` - [#334](https://github.com/nf-core/sarek/pull/334) - Sync `dsl2` and `dev` branches - [#342](https://github.com/nf-core/sarek/pull/342) - Update `README.md` +- [#386](https://github.com/nf-core/sarek/pull/386) - Annotation is back +- [#410](https://github.com/nf-core/sarek/pull/410), [#412](https://github.com/nf-core/sarek/pull/412), [#584](https://github.com/nf-core/sarek/pull/584) - Update `CI` tests +- [#418](https://github.com/nf-core/sarek/pull/418) - Fix `known_sites` channels +- [#432](https://github.com/nf-core/sarek/pull/432), [#457](https://github.com/nf-core/sarek/pull/457) - Sort before `tabix index` +- [#454](https://github.com/nf-core/sarek/pull/454) - Input is optional (can actually be found automatically by `Sarek` if previously run) +- [#463](https://github.com/nf-core/sarek/pull/463), [#468](https://github.com/nf-core/sarek/pull/468) - Fix `nf-core lint` +- [#513](https://github.com/nf-core/sarek/pull/513), [#527](https://github.com/nf-core/sarek/pull/527) - CNV is back +- [#529](https://github.com/nf-core/sarek/pull/529) - Do not save `versions.yml` files +- [#524](https://github.com/nf-core/sarek/pull/524) - Fix intervals usage by counting the actual list of scatter/gather files produced and not overall number of intervals +- [#549](https://github.com/nf-core/sarek/pull/549) - Fix unique lanes required for Freebayes: issue [#311](https://github.com/nf-core/sarek/issues/311), replaces `meta.clone()` with actual copy of map to avoid issues with +- [#567](https://github.com/nf-core/sarek/pull/567) - Fix interval name resolving during scatter/gather by moving logic to modules.config causing name to be correctly resolved on process execution; also fixed duplicate naming when variant callers produce multiple vcf files by adding field `type` to `meta` map +- [#585](https://github.com/nf-core/sarek/pull/585) - Fix Spark usage for GATK4 modules +- [#587](https://github.com/nf-core/sarek/pull/587) - Fix issue with VEP extra files +- [#581](https://github.com/nf-core/sarek/pull/581) - `TIDDIT` is back +- [#590](https://github.com/nf-core/sarek/pull/590) - Fix empty folders during scatter/gather +- [#592](https://github.com/nf-core/sarek/pull/592) - Fix optional resources for Mutect2, GetPileupSummaries, and HaplotypeCaller: issue [#299](https://github.com/nf-core/sarek/issues/299), [#359](https://github.com/nf-core/sarek/issues/359), [#367](https://github.com/nf-core/sarek/issues/367) +- [#598](https://github.com/nf-core/sarek/pull/598), [#614](https://github.com/nf-core/sarek/pull/614), [#626](https://github.com/nf-core/sarek/pull/626) - Remove WARNING message for config selector not matching +- [#599](https://github.com/nf-core/sarek/pull/599) - Add checks for correct data type for `params.step` +- [#599](https://github.com/nf-core/sarek/pull/599) - Add checks for no empty `--tools` with `--step variant_calling` or `--step annotate` +- [#600](https://github.com/nf-core/sarek/pull/600) - Remove `nf-core lint` warnings +- [#602](https://github.com/nf-core/sarek/pull/602) - Fixed bug in `alignment_to_fastq` and added tests +- [#609](https://github.com/nf-core/sarek/pull/609) - Remove unused intervals code, reorganize combined intervals file +- [#613](https://github.com/nf-core/sarek/pull/613) - Fixed filenames for `dbnsfp` and `SpliceAI` `VEP` plugin +- [#615](https://github.com/nf-core/sarek/pull/615) - Fix ASCAT igenomes file paths +- [#619](https://github.com/nf-core/sarek/pull/619) - Fix issue with checking samplesheet content with AWS +- [#628](https://github.com/nf-core/sarek/pull/628) - Fix issue with value converting to string before schema validation +- [#628](https://github.com/nf-core/sarek/pull/628) - Fix dbsnp check issue with `--step annotate` +- [#618](https://github.com/nf-core/sarek/pull/618) - Fix `bcftools/vcftools` sample labelling in multiqc report +- [#618](https://github.com/nf-core/sarek/pull/618) - Fix issue with tiddit [#621](https://github.com/nf-core/sarek/issues/621) +- [#618](https://github.com/nf-core/sarek/pull/618) - Fix channel issue with `targets.bed` in prepare_intervals +- [#634](https://github.com/nf-core/sarek/pull/634) - Fix issue with samtools/mosdepth plots in multiqc_report +- [#641](https://github.com/nf-core/sarek/pull/641) - Fix issue with duplicate substring in tools and skip_tools +- [#642](https://github.com/nf-core/sarek/pull/642) - Only unzip ref files if tool is run, only publish ref files if `--save_reference` and simplify CNKit logic +- [#650](https://github.com/nf-core/sarek/pull/650) - Fix intervals checks +- [#654](https://github.com/nf-core/sarek/pull/654) - Allow any step but annotation to start from BAM files +- [#655](https://github.com/nf-core/sarek/pull/655) - Fix `--intervals false` logic & add versioning for local modules +- [#658](https://github.com/nf-core/sarek/pull/658) - Fix split fastq names in multiqc-report +- [#666](https://github.com/nf-core/sarek/pull/666) - Simplify multiqc config channel input +- [#668](https://github.com/nf-core/sarek/pull/668) - Add `snpeff_version` and `vep_version` to `schema_ignore_params` to avoid issue when specifying on command line +- [#669](https://github.com/nf-core/sarek/pull/669) - Fix path to files when creating csv files + +### Dependencies + +| Dependency | Old version | New version | +| ---------------------- | ----------- | ----------- | +| `ascat` | 2.5.2 | 3.0.0 | +| `bcftools` | 1.9 | 1.15.1 | +| `bwa-mem2` | 2.0 | 2.2.1 | +| `bwa` | 0.7.17 | unchanged | +| `cancerit-allelecount` | 4.0.2 | 4.3.0 | +| `cnvkit` | 0.9.6 | 0.9.9 | +| `control-freec` | 11.6 | unchanged | +| `deepvariant` | added | 1.3.0 | +| `dragmap` | added | 1.2.1 | +| `ensembl-vep` | 99.2 | 106.1 | +| `fastp` | added | 0.23.2 | +| `fastqc` | 0.11.9 | unchanged | +| `fgbio` | 1.1.0 | 2.0.2 | +| `freebayes` | 1.3.2 | 1.3.5 | +| `gatk4` | 4.1.7.0 | 4.2.6.1 | +| `gawk` | added | 5.1.0 | +| `genesplicer` | 1.0 | removed | +| `htslib` | 1.9 | removed | +| `llvm-openmp` | 8.0.1 | removed | +| `manta` | 1.6.0 | unchanged | +| `markdown` | 3.1.1 | removed | +| `mosdepth` | 0.3.3 | unchanged | +| `msisensor-pro` | 1.1.a | 1.2.0 | +| `msisensor` | 0.5 | removed | +| `multiqc` | 1.8 | 1.13a | +| `openjdk` | added | 8.0.312 | +| `openmp` | 8.0.1 | removed | +| `p7zip` | added | 15.09 | +| `pigz` | 2.3.4 | unchanged | +| `pygments` | 2.5.2 | removed | +| `pymdown-extensions` | 6.0 | removed | +| `qualimap` | 2.2.2d | removed | +| `r-ggplot2` | 3.3.0 | removed | +| `samblaster` | 0.1.24 | 0.1.26 | +| `samtools` | 1.9 | 1.15.1 | +| `sed` | added | 4.7 | +| `snpeff` | 4.3.1t | 5.1 | +| `strelka` | 2.9.10 | unchanged | +| `svdb` | added | 2.6.1 | +| `tabix` | added | 1.11 | +| `tiddit` | 2.7.1 | 3.1.0 | +| `trim-galore` | 0.6.5 | removed | +| `vcfanno` | 0.3.2 | removed | +| `vcftools` | 0.1.16 | unchanged | ### Deprecated ### Removed +- [#485](https://github.com/nf-core/sarek/pull/485) - `--skip_qc`, `--skip_markduplicates` and `--skip_bqsr` is now `--skip_tools` +- [#538](https://github.com/nf-core/sarek/pull/538) - `--sequencing_center` is now `--seq_center` +- [#538](https://github.com/nf-core/sarek/pull/538) - `--markdup_java_options` has been removed +- [#539](https://github.com/nf-core/sarek/pull/539) - `--annotate_tools` has been removed +- [#539](https://github.com/nf-core/sarek/pull/539) - `--cadd_cache`, `--cadd_indels`, `--cadd_indels_tbi`, `--cadd_wg_snvs`, `--cadd_wg_snvs_tbi` have been removed +- [#539](https://github.com/nf-core/sarek/pull/539) - `--genesplicer` has been removed +- [#539](https://github.com/nf-core/sarek/pull/539) - `conf/genomes.config` and `params.genomes_base` have been removed +- [#562](https://github.com/nf-core/sarek/pull/562) - Restart from `--step annotate` from folder is removed. Use a `csv` file instead +- [#571](https://github.com/nf-core/sarek/pull/571) - Removed the local module `concat_vcf` +- [#605](https://github.com/nf-core/sarek/pull/605) - Removed Scatter/gather from GATK_SINGLE_SAMPLE_GERMLINE_VARIANT_CALLING, all intervals are processed together +- [#643](https://github.com/nf-core/sarek/pull/643) - Removed Sentieon parameters + +## [2.7.2](https://github.com/nf-core/sarek/releases/tag/2.7.2) - Áhkká + +Áhkká is one of the massifs just outside of the Sarek National Park. + +### Fixed + +- [#566](https://github.com/nf-core/sarek/pull/566) - Fix caching bug affecting a variable number of `MapReads` jobs due to non-deterministic state of `statusMap` during caching evaluation + ## [2.7.1](https://github.com/nf-core/sarek/releases/tag/2.7.1) - Pårtejekna Pårtejekna is one of glaciers of the Pårte Massif. @@ -157,7 +1742,7 @@ Piellorieppe is one of the main massif in the Sarek National Park. - [#180](https://github.com/nf-core/sarek/pull/180) - Now saving Mapped BAMs (and creating TSV) in minimal setting - [#182](https://github.com/nf-core/sarek/pull/182) - Add possibility to run `HaplotypeCaller` without `dbsnp` so it can be used to actually generate vcfs to build a set of known sites (cf [gatkforums](https://gatkforums.broadinstitute.org/gatk/discussion/1247/what-should-i-use-as-known-variants-sites-for-running-tool-x)) - [#195](https://github.com/nf-core/sarek/pull/195) - Now creating TSV for duplicates marked BAMs in minimal setting -- [#195](https://github.com/nf-core/sarek/pull/195), [#202](https://github.com/nf-core/sarek/pull/202) - Add `--save_bam_mapped` params to save mapped BAMs. +- [#195](https://github.com/nf-core/sarek/pull/195), [#202](https://github.com/nf-core/sarek/pull/202) - Add `--save_bam_mapped` params to save mapped BAMs - [#197](https://github.com/nf-core/sarek/pull/197) - Add step `prepare_recalibration` to allow restart from DuplicatesMarked BAMs - [#204](https://github.com/nf-core/sarek/pull/204) - Add step `Control-FREEC` to allow restart from pileup files - [#205](https://github.com/nf-core/sarek/pull/205) - Add `--skip_markduplicates` to allow skipping the `MarkDuplicates` process @@ -319,9 +1904,9 @@ Jåkkåtjkaskajekna is one of the two glaciers of the Ålkatj Massif. ### Fixed -- [#48](https://github.com/nf-core/sarek/issues/48) - Fix `singularity.autoMounts` issue. -- [#49](https://github.com/nf-core/sarek/issues/49) - Use correct tag for annotation containers. -- [#50](https://github.com/nf-core/sarek/issues/50) - Fix paths for scripts. +- [#48](https://github.com/nf-core/sarek/issues/48) - Fix `singularity.autoMounts` issue +- [#49](https://github.com/nf-core/sarek/issues/49) - Use correct tag for annotation containers +- [#50](https://github.com/nf-core/sarek/issues/50) - Fix paths for scripts ## [2.5](https://github.com/nf-core/sarek/releases/tag/2.5) - Ålkatj @@ -377,7 +1962,7 @@ Initial release of `nf-core/sarek`, created with the [nf-core](http://nf-co.re/) - [#7](https://github.com/nf-core/sarek/pull/8), [#23](https://github.com/nf-core/sarek/pull/23) - `--annotateVCF` is now deprecated, use `--input` instead - [#8](https://github.com/nf-core/sarek/pull/8), [#12](https://github.com/nf-core/sarek/pull/12) - Improve helper script `build.nf` for downloading and building reference files - [#9](https://github.com/nf-core/sarek/pull/9) - `ApplyBQSR` is now parallelized -- [#9](https://github.com/nf-core/sarek/pull/9) - Fastq files are named following "${idRun}_R1.fastq.gz" in the `FastQC` output for easier reporting +- [#9](https://github.com/nf-core/sarek/pull/9) - Fastq files are named following "${idRun}\_R1.fastq.gz" in the `FastQC` output for easier reporting - [#9](https://github.com/nf-core/sarek/pull/9) - Status is now a map with `idpatient`, `idsample` as keys (ie: `status = statusMap[idPatient, idSample]`) - [#9](https://github.com/nf-core/sarek/pull/9) - Use `ensembl-vep` `95.2` instead of `96.0` - [#11](https://github.com/nf-core/sarek/pull/11) - Summary HTML from `VEP` is now in the `Reports` directory @@ -394,7 +1979,7 @@ Initial release of `nf-core/sarek`, created with the [nf-core](http://nf-co.re/) - [#18](https://github.com/nf-core/sarek/pull/18), [#29](https://github.com/nf-core/sarek/pull/29) - `--noReports` is now `--skipQC all` - [#18](https://github.com/nf-core/sarek/pull/18), [#21](https://github.com/nf-core/sarek/pull/21) - Update logo - [#21](https://github.com/nf-core/sarek/pull/21) - Moved `smallGRCh37` path to `genomes.config` -- [#23](https://github.com/nf-core/sarek/pull/23) - Rename `genomeFile`, `genomeIndex` and `genomeDict` by `fasta`, `fastaFai` and `dict` +- [#23](https://github.com/nf-core/sarek/pull/23) - Rename `genomeFile`, `genomeIndex` and `genomeDict` by `fasta`, `fastaFai` and `dict` - [#23](https://github.com/nf-core/sarek/pull/23) - `--sample` is now deprecated, use `--input` instead - [#23](https://github.com/nf-core/sarek/pull/23) - `--genomeFile` is now deprecated, use `--fasta` instead - [#23](https://github.com/nf-core/sarek/pull/23) - `--genomeIndex` is now deprecated, use `--fastaFai` instead @@ -529,7 +2114,7 @@ Initial release of `nf-core/sarek`, created with the [nf-core](http://nf-co.re/) ### Added - [#671](https://github.com/SciLifeLab/Sarek/pull/671) - New `publishDirMode` param and docs -- [#673](https://github.com/SciLifeLab/Sarek/pull/673), [#675](https://github.com/SciLifeLab/Sarek/pull/675), [#676](https://github.com/SciLifeLab/Sarek/pull/676) - Profiles for BinAC and CFC clusters in Tübingen +- [#673](https://github.com/SciLifeLab/Sarek/pull/673), [#675](https://github.com/SciLifeLab/Sarek/pull/675), [#676](https://github.com/SciLifeLab/Sarek/pull/676) - Profiles for BinAC and CFC clusters in Tübingen - [#679](https://github.com/SciLifeLab/Sarek/pull/679) - Add container for `CreateIntervalBeds` - [#692](https://github.com/SciLifeLab/Sarek/pull/692), [#697](https://github.com/SciLifeLab/Sarek/pull/697) - Add `AWS iGenomes` possibilities (within `conf/igenomes.conf`) - [#694](https://github.com/SciLifeLab/Sarek/pull/694) - Add monochrome and grey logos for light or dark background @@ -553,7 +2138,7 @@ Initial release of `nf-core/sarek`, created with the [nf-core](http://nf-co.re/) ### Fixed - [#665](https://github.com/SciLifeLab/Sarek/pull/665) - Input bam file now has always the same name (whether it is from a single fastq pair or multiple) in the `MarkDuplicates` process, so metrics too -- [#672](https://github.com/SciLifeLab/Sarek/pull/672) - Process `PullSingularityContainers` from `buildContainers.nf` now expect a file with the correct `.simg` extension for singularity images, and no longer the `.img` one. +- [#672](https://github.com/SciLifeLab/Sarek/pull/672) - Process `PullSingularityContainers` from `buildContainers.nf` now expect a file with the correct `.simg` extension for singularity images, and no longer the `.img` one - [#679](https://github.com/SciLifeLab/Sarek/pull/679) - Add `publishDirMode` for `germlineVC.nf` - [#700](https://github.com/SciLifeLab/Sarek/pull/700) - Fix [#699](https://github.com/SciLifeLab/Sarek/issues/699) missing DP in the FORMAT column VCFs for Mutect2 - [#702](https://github.com/SciLifeLab/Sarek/pull/702) - Fix [#701](https://github.com/SciLifeLab/Sarek/issues/701) @@ -629,7 +2214,7 @@ Ruotes is one of the main massif in the Sarek National Park. - [#556](https://github.com/SciLifeLab/Sarek/pull/556) - `Strelka` Best Practices - [#563](https://github.com/SciLifeLab/Sarek/pull/563) - Use `SnpEFF` reports in `MultiQC` - [#568](https://github.com/SciLifeLab/Sarek/pull/568) - `VCFTools` process `RunVcftools` for QC -- [#574](https://github.com/SciLifeLab/Sarek/pull/574), [#580](https://github.com/SciLifeLab/Sarek/pull/580) - Abstracts for `NPMI`, `JOBIM` and `EACR25` +- [#574](https://github.com/SciLifeLab/Sarek/pull/574), [#580](https://github.com/SciLifeLab/Sarek/pull/580) - Abstracts for `NPMI`, `JOBIM` and `EACR25` - [#577](https://github.com/SciLifeLab/Sarek/pull/577) - New repository for testing: [Sarek-data](https://github.com/SciLifeLab/Sarek-data) - [#595](https://github.com/SciLifeLab/Sarek/pull/595) - New library `QC` for functions `bamQC`, `bcftools`, `samtoolsStats`, `vcftools`, `getVersionBCFtools`, `getVersionGATK`, `getVersionManta`, `getVersionSnpEFF`, `getVersionStrelka`, `getVersionVCFtools`, `getVersionVEP` - [#595](https://github.com/SciLifeLab/Sarek/pull/595) - New Processes `GetVersionBCFtools`, `GetVersionGATK`, `GetVersionManta`, `GetVersionSnpEFF`, `GetVersionStrelka`, `GetVersionVCFtools`, `GetVersionVEP` @@ -651,17 +2236,17 @@ Ruotes is one of the main massif in the Sarek National Park. - [#582](https://github.com/SciLifeLab/Sarek/pull/582), [#587](https://github.com/SciLifeLab/Sarek/pull/587) - Update figures - [#595](https://github.com/SciLifeLab/Sarek/pull/595) - Function `defineDirectoryMap()` is now part of `SarekUtils` - [#595](https://github.com/SciLifeLab/Sarek/pull/595) - Process `GenerateMultiQCconfig` replace by function `createMultiQCconfig()` -- [#597](https://github.com/SciLifeLab/Sarek/pull/597) - `extractBams()` now takes an extra parameter. +- [#597](https://github.com/SciLifeLab/Sarek/pull/597) - `extractBams()` now takes an extra parameter - [#597](https://github.com/SciLifeLab/Sarek/pull/597) - Move `checkFileExtension()`, `checkParameterExistence()`, `checkParameterList()`, `checkReferenceMap()`, `checkRefExistence()`, `extractBams()`, `extractGenders()`, `returnFile()`, `returnStatus()` and `returnTSV()` functions to `SarekUtils` - [#597](https://github.com/SciLifeLab/Sarek/pull/597) - Reduce data footprint for Process `CreateRecalibrationTable` -- [#597](https://github.com/SciLifeLab/Sarek/pull/597) - Replace deprecated operator `phase` by `join`. +- [#597](https://github.com/SciLifeLab/Sarek/pull/597) - Replace deprecated operator `phase` by `join` - [#599](https://github.com/SciLifeLab/Sarek/pull/599) - Merge is tested with `ANNOTATEALL` - [#604](https://github.com/SciLifeLab/Sarek/pull/604) - Synching `GRCh38` `wgs_calling_regions` bedfiles - [#607](https://github.com/SciLifeLab/Sarek/pull/607) - One container approach - [#607](https://github.com/SciLifeLab/Sarek/pull/607) - Update to `GATK4` - [#608](https://github.com/SciLifeLab/Sarek/pull/608) - Update `Nextflow` required version - [#616](https://github.com/SciLifeLab/Sarek/pull/616) - Update `CHANGELOG` -- [#617](https://github.com/SciLifeLab/Sarek/pull/617) - Replace deprecated `Nextflow ``$name` syntax with `withName` +- [#617](https://github.com/SciLifeLab/Sarek/pull/617) - Replace deprecated `Nextflow` `$name` syntax with `withName` ### Fixed @@ -670,7 +2255,7 @@ Ruotes is one of the main massif in the Sarek National Park. - [#579](https://github.com/SciLifeLab/Sarek/pull/579), [#584](https://github.com/SciLifeLab/Sarek/pull/584) - `Manta` output reorganized after modification for `Strelka Best Practices` process - [#585](https://github.com/SciLifeLab/Sarek/pull/583) - Trace file is plain txt - [#590](https://github.com/SciLifeLab/Sarek/pull/590), [#593](https://github.com/SciLifeLab/Sarek/pull/593) - Fix `Singularity` installation in `Travis CI` testing -- [#598](https://github.com/SciLifeLab/Sarek/pull/598), [#601](https://github.com/SciLifeLab/Sarek/pull/601) - Fixes for `Python` script `selectROI.py` to work with `CLC` viewer +- [#598](https://github.com/SciLifeLab/Sarek/pull/598), [#601](https://github.com/SciLifeLab/Sarek/pull/601) - Fixes for `Python` script `selectROI.py` to work with `CLC` viewer ### Removed @@ -678,7 +2263,7 @@ Ruotes is one of the main massif in the Sarek National Park. ## [2.0.0](https://github.com/SciLifeLab/Sarek/releases/tag/2.0.0) - 2018-03-23 -First release under the `Sarek` name, from the National Park in Northern Sweden +First release under the `Sarek` name, from the National Park in Northern Sweden. ### Added diff --git a/CITATIONS.md b/CITATIONS.md index 35cd2ba26d..6bc1eaf62b 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -1,5 +1,9 @@ # nf-core/sarek: Citations +## [nf-core/sarek 3](https://www.biorxiv.org/content/10.1101/2023.07.19.549462v2) + +> Hanssen F, Garcia MU, Folkersen L, Pedersen AS, Lescai F, Jodoin S, Miller E, Wacker O, Smith N, nf-core community, Gabernet G, Nahnsen S. Scalable and efficient DNA sequencing analysis on different compute infrastructures aiding variant discovery. bioRxiv. 2023 Jul 19:2023-07. + ## [nf-core/sarek](https://pubmed.ncbi.nlm.nih.gov/32269765/) > Garcia MU, Juhos S, Larsson M, Olason PI, Martin M, Eisfeldt J, DiLorenzo S, Sandgren J, Díaz De Ståhl T, Ewels PA, Wirta V, Nistér M, Käller M, Nystedt B. Sarek: A portable workflow for whole-genome sequencing analysis of germline and somatic variants. F1000Res. 2020 Jan 29;9:63. eCollection 2020. doi: 10.12688/f1000research.16665.2. PubMed PMID: 32269765. @@ -14,78 +18,202 @@ ## Pipeline tools -* [ASCAT](https://pubmed.ncbi.nlm.nih.gov/20837533/) - > Van Loo P, Nordgard SH, Lingjærde OC, et al.: Allele-specific copy number analysis of tumors. Proc Natl Acad Sci USA . 2010 Sep 28;107(39):16910-5. doi: 10.1073/pnas.1009843107. Epub 2010 Sep 13. PubMed PMID: 20837533; PubMed Central PMCID: PMC2947907. +- [ASCAT](https://pubmed.ncbi.nlm.nih.gov/20837533/) + + > Van Loo P, Nordgard SH, Lingjærde OC, et al.: Allele-specific copy number analysis of tumors. Proc Natl Acad Sci USA . 2010 Sep 28;107(39):16910-5. doi: 10.1073/pnas.1009843107. PubMed PMID: 20837533; PubMed Central PMCID: PMC2947907. + +- [alleleCount](https://github.com/cancerit/alleleCount) + +- [BCFTools](https://pubmed.ncbi.nlm.nih.gov/21903627/) + + > Li H: A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics. 2011 Nov 1;27(21):2987-93. doi: 10.1093/bioinformatics/btr509. PubMed PMID: 21903627; PubMed Central PMCID: PMC3198575. + +- [BGZip](https://github.com/madler/pigz) + +- [BWA-MEM](https://arxiv.org/abs/1303.3997v2) + + > Li H: Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. arXiv 2013. doi: 10.48550/arXiv.1303.3997 + +- [BWA-MEM2](https://ieeexplore.ieee.org/document/8820962) + + > M. Vasimuddin, S. Misra, H. Li and S. Aluru, "Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems," 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS), 2019, pp. 314-324. doi: 10.1109/IPDPS.2019.00041. + +- [CNVKIT](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873) + + > Talevich E, Shain AH, Botton T, Bastian BC (2016) CNVkit: Genome-Wide Copy Number Detection and Visualization from Targeted DNA Sequencing. PLoS Comput Biol 12(4): e1004873. doi: 10.1371/journal.pcbi.1004873. PubMed PMID: 27100738. PubMed Central PMCID: PMC4839673. + +- [Condel](https://pubmed.ncbi.nlm.nih.gov/21457909/) + + > González-Pérez A, López-Bigas N. Improving the assessment of the outcome of nonsynonymous SNVs with a consensus deleteriousness score, Condel. Am J Hum Genet. 2011 Apr 8;88(4):440-9. doi: 10.1016/j.ajhg.2011.03.004. PubMed PMID: 21457909; PubMed Central PMCID: PMC3071923. + +- [Control-FREEC](https://pubmed.ncbi.nlm.nih.gov/22155870/) + + > Boeva V, Popova T, Bleakley K, et al.: Control-FREEC: a tool for assessing copy number and allelic content using next-generation sequencing data. Bioinformatics. 2012; 28(3): 423–5. doi: 10.1093/bioinformatics/btr670. Epub 2011 Dec 6. PubMed PMID: 22155870; PubMed Central PMCID: PMC3268243. + +- [dbNSFP](https://pubmed.ncbi.nlm.nih.gov/33261662/) + + > Liu X, et al.: dbNSFP v4: a comprehensive database of transcript-specific functional predictions and annotations for human nonsynonymous and splice-site SNVs. Genome Med. 2020 Dec 2;12(1):103. doi: 10.1186/s13073-020-00803-9. PubMed PMID: 33261662; PubMed Central PMCID: PMC7709417. + +- [DeepVariant](https://www.nature.com/articles/nbt.4235) + + > Poplin, R., Chang, PC., Alexander, D. et al. A universal SNP and small-indel variant caller using deep neural networks. Nat Biotechnol 36, 983–987 (2018). doi: 10.1038/nbt.4235. + +- [DragMap](https://github.com/Illumina/DRAGMAP) + +- [EnsemblVEP](https://pubmed.ncbi.nlm.nih.gov/27268795/) + + > McLaren W, Gil L, Hunt SE, et al.: The Ensembl Variant Effect Predictor. Genome Biol. 2016 Jun 6;17(1):122. doi: 10.1186/s13059-016-0974-4. PubMed PMID: 27268795; PubMed Central PMCID: PMC4893825. + +- [FastP](https://academic.oup.com/bioinformatics/article/34/17/i884/5093234) + + > Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu, fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics, Volume 34, Issue 17, 01 September 2018, Pages i884–i890, doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086. PubMed Central PMCID: PMC6129281 + +- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + +- [FGBio](https://github.com/fulcrumgenomics/fgbio) + + > doi: 10.5281/zenodo.10456900 + +- [FreeBayes](https://arxiv.org/abs/1207.3907) + + > Garrison E, Marth G. Haplotype-based variant detection from short-read sequencing. arXiv preprint arXiv:1207.3907 [q-bio.GN] 2012. doi: 10.48550/arXiv.1207.3907 + +- [GATK](https://pubmed.ncbi.nlm.nih.gov/20644199/) + + > McKenna A, Hanna M, Banks E, et al.: The Genome Analysis Toolkit: a MapReduce framework for analyzing next-generation DNA sequencing data. Genome Res. 2010 Sep;20(9):1297-303. doi: 10.1101/gr.107524.110. Epub 2010 Jul 19. PubMed PMID: 20644199; PubMed Central PMCID: PMC2928508. + +- [GNU sed](http://www.gnu.org/software/sed/) + +- [goleft indexcov](https://pubmed.ncbi.nlm.nih.gov/29048539/) + + > Pedersen BS, Collins RL, Talkowski ME, Quinlan AR. Indexcov: fast coverage quality control for whole-genome sequencing. Gigascience. 2017 Nov 1;6(11):1-6. doi: 10.1093/gigascience/gix090. PubMed PMID: 29048539; PubMed Central PMCID: PMC5737511. + +- [HaplotypeCaller Joint Germline](https://www.biorxiv.org/content/10.1101/201178v3) -* [BCFTools](https://pubmed.ncbi.nlm.nih.gov/21903627/) - > Li H: A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics. 2011 Nov 1;27(21):2987-93. doi: 10.1093/bioinformatics/btr509. Epub 2011 Sep 8. PubMed PMID: 21903627; PubMed Central PMCID: PMC3198575. + > Poplin R. et al, Scaling accurate genetic variant discovery to tens of thousands of samples, bioRxiv 2018. doi: 10.1101/201178 -* [BWA-MEM](https://arxiv.org/abs/1303.3997v2) - > Li H: Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. arXiv 1303.3997v2. 2013 +- [LOFTEE](https://pubmed.ncbi.nlm.nih.gov/32461654/) -* [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + > Karczewski KJ, et al.: The mutational constraint spectrum quantified from variation in 141,456 humans. Nature. 2020 May;581(7809):434-443. doi: 10.1038/s41586-020-2308-7. PubMed PMID: 32461654; PubMed Central PMCID: PMC7334197. -* [Control-FREEC](https://pubmed.ncbi.nlm.nih.gov/22155870/) - > Boeva V, Popova T, Bleakley K, et al.: Control-FREEC: a tool for assessing copy number and allelic content using next-generation sequencing data. Bioinformatics. 2012; 28(3): 423–5. doi: 10.1093/bioinformatics/btr670. Epub 2011 Dec 6. PubMed PMID: 22155870; PubMed Central PMCID: PMC3268243. +- [Manta](https://pubmed.ncbi.nlm.nih.gov/26647377/) -* [GATK](https://pubmed.ncbi.nlm.nih.gov/20644199/) - > McKenna A, Hanna M, Banks E, et al.: The Genome Analysis Toolkit: a MapReduce framework for analyzing next-generation DNA sequencing data. Genome Res. 2010 Sep;20(9):1297-303. doi: 10.1101/gr.107524.110. Epub 2010 Jul 19. PubMed PMID: 20644199; PubMed Central PMCID: PMC2928508. + > Chen X, Schulz-Trieglaff O, Shaw R, et al.: Manta: rapid detection of structural variants and indels for germline and cancer sequencing applications. Bioinformatics. 2016 Apr 15;32(8):1220-2. doi: 10.1093/bioinformatics/btv710. PubMed PMID: 26647377. -* [Manta](https://pubmed.ncbi.nlm.nih.gov/26647377/) - > Chen X, Schulz-Trieglaff O, Shaw R, et al.: Manta: rapid detection of structural variants and indels for germline and cancer sequencing applications. Bioinformatics. 2016 Apr 15;32(8):1220-2. doi: 10.1093/bioinformatics/btv710. Epub 2015 Dec 8. PubMed PMID: 26647377. +- [Mastermind](https://pubmed.ncbi.nlm.nih.gov/33281875/) -* [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + > Chunn LM, Nefcy DC, Scouten RW, Tarpey RP, Chauhan G, Lim MS, Elenitoba-Johnson KSJ, Schwartz SA, Kiel MJ. Mastermind: A Comprehensive Genomic Association Search Engine for Empirical Evidence Curation and Genetic Variant Interpretation. Front Genet. 2020 Nov 13;11:577152. doi: 10.3389/fgene.2020.577152. PubMed PMID: 33281875; PubMed Central PMCID: PMC7691534. -* [Qualimap 2](https://pubmed.ncbi.nlm.nih.gov/26428292/) - > Okonechnikov K, Conesa A, García-Alcalde F. Qualimap 2: advanced multi-sample quality control for high-throughput sequencing data. Bioinformatics. 2016 Jan 15;32(2):292-4. doi: 10.1093/bioinformatics/btv566. Epub 2015 Oct 1. PubMed PMID: 26428292; PubMed Central PMCID: PMC4708105. +- [Mosdepth](https://academic.oup.com/bioinformatics/article/34/5/867/4583630) -* [SAMtools](https://pubmed.ncbi.nlm.nih.gov/19505943/) - > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + > Brent S Pedersen, Aaron R Quinlan, Mosdepth: quick coverage calculation for genomes and exomes, Bioinformatics, Volume 34, Issue 5, 01 March 2018, Pages 867–868. doi: 10.1093/bioinformatics/btx699. PubMed PMID: 29096012. PubMed Central PMCID: PMC6030888. -* [snpEff](https://pubmed.ncbi.nlm.nih.gov/22728672/) - > Cingolani P, Platts A, Wang le L, et al.: A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3. Fly (Austin). Apr-Jun 2012;6(2):80-92. doi: 10.4161/fly.19695. PubMed PMID: 22728672; PubMed Central PMCID: PMC3679285. +- [MSIsensor2](https://github.com/niu-lab/msisensor2) -* [Strelka2](https://pubmed.ncbi.nlm.nih.gov/30013048/) - > Kim S, Scheffler K, Halpern AL, et al.: Strelka2: fast and accurate calling of germline and somatic variants. Nat Methods. 2018 Aug;15(8):591-594. doi: 10.1038/s41592-018-0051-x. Epub 2018 Jul 16. PubMed PMID: 30013048. +- [MSISensorPro](https://www.sciencedirect.com/science/article/pii/S1672022920300218) -* [TIDDIT](https://pubmed.ncbi.nlm.nih.gov/28781756/) - > Eisfeldt J, Vezzi F, Olason P, et al.: TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Res. 2017 May 10;6:664. doi: 10.12688/f1000research.11168.2. eCollection 2017. PubMed PMID: 28781756; PubMed Central PMCID: PMC5521161. + > Peng Jia, Xiaofei Yang, Li Guo, Bowen Liu, Jiadong Lin, Hao Liang, et al. MSIsensor-pro: fast, accurate, and matched-normal-sample-free detection of microsatellite instability. Genomics Proteomics Bioinformatics 2020,18(1). doi: 10.1016/j.gpb.2020.02.001. PubMed PMID: 32171661. PubMed Central PMCID: PMC7393535. -* [Trim Galore!](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) -* [VCFTools](https://pubmed.ncbi.nlm.nih.gov/21653522/) - > Danecek P, Auton A, Abecasis G, et al.: The variant call format and VCFtools. Bioinformatics. 2011 Aug 1;27(15):2156-8. doi: 10.1093/bioinformatics/btr330. Epub 2011 Jun 7. PubMed PMID: 21653522; PubMed Central PMCID: PMC3137218. + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. -* [VEP](https://pubmed.ncbi.nlm.nih.gov/27268795/) - > McLaren W, Gil L, Hunt SE, et al.: The Ensembl Variant Effect Predictor. Genome Biol. 2016 Jun 6;17(1):122. doi: 10.1186/s13059-016-0974-4. PubMed PMID: 27268795; PubMed Central PMCID: PMC4893825. +- [NGSCheckMate](https://pubmed.ncbi.nlm.nih.gov/28369524/) + + > Lee S, Lee S, Ouellette S, Park WY, Lee EA, Park PJ. NGSCheckMate: software for validating sample identity in next-generation sequencing studies within and across data types. Nucleic Acids Res. 2017 Jun 20;45(11):e103. doi: 10.1093/nar/gkx193. PubMed PMID: 28369524; PubMed Central PMCID: PMC5499645. + +- [NVIDIA Clara Parabricks](https://docs.nvidia.com/clara/parabricks/latest/index.html) + + > NVIDIA (2025). Clara Parabricks (v.4.4.0-1 and above). + +- [Phenotypes](https://github.com/Ensembl/VEP_plugins/blob/release/115/Phenotypes.pm) + +- [PIGZ](https://zlib.net/pigz/) + +- [P7Zip](http://p7zip.sourceforge.net/) + +- [SAMtools](https://pubmed.ncbi.nlm.nih.gov/19505943/) + + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +- [snpEff](https://pubmed.ncbi.nlm.nih.gov/22728672/) + + > Cingolani P, Platts A, Wang le L, et al.: A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3. Fly (Austin). Apr-Jun 2012;6(2):80-92. doi: 10.4161/fly.19695. PubMed PMID: 22728672; PubMed Central PMCID: PMC3679285. + +- [SpliceAI](https://pubmed.ncbi.nlm.nih.gov/30661751/) + + > Jaganathan K, et al.: Predicting Splicing from Primary Sequence with Deep Learning. Cell. 2019 Jan 24;176(3):535-548.e24. doi: 10.1016/j.cell.2018.12.015. PubMed PMID: 30661751. + +- [SpliceRegion](https://github.com/Ensembl/VEP_plugins/blob/release/106/SpliceRegion.pm) + +- [SPRING](https://pubmed.ncbi.nlm.nih.gov/30535063/) + + > Chandak S, Tatwawadi K, Ochoa I, Hernaez M, Weissman T. SPRING: a next-generation compressor for FASTQ data. Bioinformatics. 2019 Aug 1;35(15):2674-2676. doi: 10.1093/bioinformatics/bty1015. PubMed PMID: 30535063; PubMed Central PMCID: PMC6662292. + +- [Strelka2](https://pubmed.ncbi.nlm.nih.gov/30013048/) + + > Kim S, Scheffler K, Halpern AL, et al.: Strelka2: fast and accurate calling of germline and somatic variants. Nat Methods. 2018 Aug;15(8):591-594. doi: 10.1038/s41592-018-0051-x. Epub 2018 Jul 16. PubMed PMID: 30013048. + +- [SVDB](https://github.com/J35P312/SVDB) + +- [Tabix](https://academic.oup.com/bioinformatics/article/27/5/718/262743) + + > Li H, Tabix: fast retrieval of sequence features from generic TAB-delimited files, Bioinformatics, Volume 27, Issue 5, 1 March 2011, Pages 718–719, doi: 10.1093/bioinformatics/btq671. PubMed PMID: 21208982. PubMed Central PMCID: PMC3042176. + +- [TIDDIT](https://pubmed.ncbi.nlm.nih.gov/28781756/) + + > Eisfeldt J, Vezzi F, Olason P, et al.: TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Res. 2017 May 10;6:664. doi: 10.12688/f1000research.11168.2. eCollection 2017. PubMed PMID: 28781756; PubMed Central PMCID: PMC5521161. + +- [Varlociraptor](https://pubmed.ncbi.nlm.nih.gov/32345333/) + + > Köster, J., Dijkstra, L.J., Marschall, T. et al. Varlociraptor: enhancing sensitivity and controlling false discovery rate in somatic indel discovery. Genome Biol 21, 98 (2020). https://doi.org/10.1186/s13059-020-01993-6 + +- [VCFTools](https://pubmed.ncbi.nlm.nih.gov/21653522/) + + > Danecek P, Auton A, Abecasis G, et al.: The variant call format and VCFtools. Bioinformatics. 2011 Aug 1;27(15):2156-8. doi: 10.1093/bioinformatics/btr330. Epub 2011 Jun 7. PubMed PMID: 21653522; PubMed Central PMCID: PMC3137218. + +- [vcflib](https://pubmed.ncbi.nlm.nih.gov/35639788/) + + > Garrison E, Kronenberg ZN, Dawson ET, Pedersen BS, Prins P. A spectrum of free software tools for processing the VCF variant call format: vcflib, bio-vcf, cyvcf2, hts-nim and slivar. PLoS Comput Biol. 2022 May 31;18(5):e1009123. doi: 10.1371/journal.pcbi.1009123. PubMed PMID: 35639788; PubMed Central PMCID: PMC9286226. + +- [Lofreq](https://pubmed.ncbi.nlm.nih.gov/23066108/) + + > Wilm et al. LoFreq: A sequence-quality aware, ultra-sensitive variant caller for uncovering cell-population heterogeneity from high-throughput sequencing datasets. Nucleic Acids Res. 2012; 40(22):11189-201. + +- [MuSE](https://pubmed.ncbi.nlm.nih.gov/38589250/) + + > Ji S, Zhu T, Sethia A, Wang W. Accelerated somatic mutation calling for whole-genome and whole-exome sequencing data from heterogenous tumor samples. Genome Res. 2024 May 15;34(4):633-641. doi: 10.1101/gr.278456.123. PMID: 38589250; PMCID: PMC11146589. ## R packages -* [R](https://www.R-project.org/) - > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. +- [R](https://www.R-project.org/) -* [ggplot2](https://cran.r-project.org/web/packages/ggplot2/index.html) - > H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016. + > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. -* [optparse](https://CRAN.R-project.org/package=optparse) - > Trevor L Davis (2018). optparse: Command Line Option Parser. +- [RColorBrewer](https://CRAN.R-project.org/package=RColorBrewer) -* [RColorBrewer](https://CRAN.R-project.org/package=RColorBrewer) - > Erich Neuwirth (2014). RColorBrewer: ColorBrewer Palettes. + > Erich Neuwirth (2014). RColorBrewer: ColorBrewer Palettes. ## Software packaging/containerisation tools -* [Anaconda](https://anaconda.com) - > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. +- [Anaconda](https://anaconda.com) + + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) + + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. -* [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) - > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) -* [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) - > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + > Merkel, D. 2014. Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. -* [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) -* [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) - > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000000..228b6d8654 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,10 @@ +# nf-core/sarek + +Read `docs/DEVELOPER_GUIDELINES.md` before making any changes. + +## Agent-specific rules + +- Keep branches **local** — do NOT push unless explicitly asked +- Do not amend commits without asking +- **Don't ask for confirmation** on routine git operations (creating branches, committing) — just do it following the conventions in the guidelines +- Use `nf-core` tools from the conda environment (`conda activate nf-core`) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052f1f..c089ec78c4 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/LICENSE b/LICENSE index 6060922966..96631d884e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) Maxime Garcia, Szilveszter Juhos +Copyright (c) The nf-core/sarek team Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 60a50311bc..151b88920a 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,27 @@ -# ![nf-core/sarek](docs/images/nf-core-sarek_logo_light.png#gh-light-mode-only) ![nf-core/sarek](docs/images/nf-core-sarek_logo_dark.png#gh-dark-mode-only) - -> **An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing** - -[![GitHub Actions CI Status](https://github.com/nf-core/sarek/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/sarek/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/nf-core/sarek/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/sarek/actions?query=workflow%3A%22nf-core+linting%22) +

+ + + nf-core/sarek + +

+ +[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/sarek) +[![GitHub Actions CI Status](https://github.com/nf-core/sarek/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/sarek/actions/workflows/nf-test.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/sarek/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/sarek/actions/workflows/linting.yml) [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/sarek/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.4945321-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.4945321) +[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3476425-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3476425) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg?labelColor=000000)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/sarek) [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23sarek-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/sarek) -[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core) +[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re) +[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core) [![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction @@ -22,121 +30,196 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! - +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/sarek/results). It's listed on [Elixir - Tools and Data Services Registry](https://bio.tools/nf-core-sarek) and [Dockstore](https://dockstore.org/workflows/github.com/nf-core/sarek). -## Pipeline summary +

+ +

-By default, the pipeline currently performs the following: +## Pipeline summary -* Sequencing quality control (`FastQC`) -* Map Reads to Reference (`BWA mem`) -* Mark Duplicates (`GATK MarkDuplicates`) -* Base (Quality Score) Recalibration (`GATK BaseRecalibrator`, `GATK ApplyBQSR`) -* Preprocessing quality control (`samtools stats`) -* Preprocessing quality control (`Qualimap bamqc`) -* Overall pipeline run summaries (`MultiQC`) +Depending on the options and samples provided, the pipeline can currently perform the following: + +- Form consensus reads from UMI sequences (`fgbio`) +- Sequencing quality control and trimming (enabled by `--trim_fastq`) (`FastQC`, `fastp`) +- Contamination removal (`BBSplit`, enabled by `--tools bbsplit`) +- Map Reads to Reference (`BWA-mem`, `BWA-mem2`, `dragmap` or `Sentieon BWA-mem`) +- Process BAM file (`GATK MarkDuplicates`, `GATK BaseRecalibrator` and `GATK ApplyBQSR` or `Sentieon LocusCollector` and `Sentieon Dedup`) +- _Experimental Feature_: Use GPU-accelerated parabricks implementation as alternative to "Map Reads to Reference" + "Process BAM file" (`--aligner parabricks`) +- Summarise alignment statistics (`samtools stats`, `mosdepth`) +- Variant calling (enabled by `--tools`, see [compatibility](https://nf-co.re/sarek/latest/docs/usage#which-variant-calling-tool-is-implemented-for-which-data-type)): + - `ASCAT` + - `CNVkit` + - `Control-FREEC` + - `DeepVariant` + - `freebayes` + - `GATK HaplotypeCaller` + - `GATK Mutect2` + - `indexcov` + - `Lofreq` + - `Manta` + - `mpileup` + - `MSIsensor2` + - `MSIsensor-pro` + - `MuSE` + - `Sentieon Haplotyper` + - `Strelka` + - `TIDDIT` +- Post-variant calling options, one of: + - Filtering (`bcftools view` (default: filter by `PASS,.`)), normalisation (`bcftools norm`) and consensus calling (`bcftools isec`, default: called by at least 2 tools `-n+2`) on all vcfs and/or `bcftools concat` for germline vcfs + - `Varlociraptor` for all vcfs +- Variant filtering and annotation (`SnpEff`, `Ensembl VEP`, `BCFtools annotate`, `SnpSift`) +- Summarise and represent QC (`MultiQC`)

- +

-## Quick Start +## Usage + +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. -1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`) +First, prepare a samplesheet with your input data that looks as follows: -2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_ +`samplesheet.csv`: -3. Download the pipeline and test it on a minimal dataset with a single command: +```csv +patient,sample,lane,fastq_1,fastq_2 +ID1,S1,L002,ID1_S1_L002_R1_001.fastq.gz,ID1_S1_L002_R2_001.fastq.gz +``` - ```console - nextflow run nf-core/sarek -profile test,YOURPROFILE - ``` +Each row represents a pair of fastq files (paired end). - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. +Now, you can run the pipeline using: - > * The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > * Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > * If you are using `singularity` and are persistently observing issues downloading Singularity images directly due to timeout or network issues, then you can use the `--singularity_pull_docker_container` parameter to pull and convert the Docker image instead. Alternatively, you can use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > * If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. +```bash +nextflow run nf-core/sarek \ + -profile \ + --input samplesheet.csv \ + --outdir +``` -4. Start running your own analysis! +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files). - ```bash - nextflow run nf-core/sarek -profile --input sample.csv --genome GRCh38 - ``` +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/sarek/usage) and the [parameter documentation](https://nf-co.re/sarek/parameters). -See [usage docs](https://nf-co.re/sarek/usage) for all of the available options when running the pipeline. +## Pipeline output -## Documentation +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/sarek/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/sarek/output). -The nf-core/sarek pipeline comes with documentation about the pipeline [usage](https://nf-co.re/sarek/usage), [parameters](https://nf-co.re/sarek/parameters) and [output](https://nf-co.re/sarek/output). +## Benchmarking + +On each release, the pipeline is run on 3 full size tests: + +- `test_full` runs tumor-normal data for one patient from the SEQ2C consortium +- `test_full_germline` runs a WGS 30X Genome-in-a-Bottle(NA12878) dataset +- `test_full_germline_ncbench_agilent` runs two WES samples with 75M and 200M reads (data available [here](https://github.com/ncbench/ncbench-workflow#contributing-callsets)). The results are uploaded to Zenodo, evaluated against a truth dataset, and results are made available via the [NCBench dashboard](https://ncbench.github.io/report/report.html#). ## Credits -Sarek was originally written by Maxime Garcia, Szilveszter Juhos at the [National Genomics Infastructure](https://ngisweden.scilifelab.se) and [National Bioinformatics Infastructure Sweden](https://nbis.se) which are both platforms at [SciLifeLab](https://scilifelab.se), with the support of [The Swedish Childhood Tumor Biobank (Barntumörbanken)](https://ki.se/forskning/barntumorbanken). -[QBiC](https://www.qbic.uni-tuebingen.de/) later joined and helped with further development. +Sarek was originally written by Maxime U Garcia and Szilveszter Juhos at the [National Genomics Infastructure](https://ngisweden.scilifelab.se) and [National Bioinformatics Infastructure Sweden](https://nbis.se) which are both platforms at [SciLifeLab](https://scilifelab.se), with the support of [The Swedish Childhood Tumor Biobank (Barntumörbanken)](https://ki.se/forskning/barntumorbanken). +Friederike Hanssen and Gisela Gabernet at [QBiC](https://www.qbic.uni-tuebingen.de/) later joined and helped with further development. + +The Nextflow DSL2 conversion of the pipeline was lead by Friederike Hanssen and Maxime U Garcia. -Main authors: +Maintenance is now lead by Friederike Hanssen and Maxime U Garcia (now at [Seqera](https://seqera.io)) -* [Gisela Gabernet](https://github.com/ggabernet) -* [Maxime Garcia](https://github.com/maxulysse) -* [Friederike Hanssen](https://github.com/FriederikeHanssen) -* [Szilveszter Juhos](https://github.com/szilvajuhos) +Main developers: + +- [Maxime U Garcia](https://github.com/maxulysse) +- [Friederike Hanssen](https://github.com/FriederikeHanssen) We thank the following people for their extensive assistance in the development of this pipeline: -* [Abhinav Sharma](https://github.com/abhi18av) -* [Adrian Lärkeryd](https://github.com/adrlar) -* [Alexander Peltzer](https://github.com/apeltzer) -* [Chela James](https://github.com/chelauk) -* [David Mas-Ponte](https://github.com/davidmasp) -* [Francesco L](https://github.com/nibscles) -* [Harshil Patel](https://github.com/drpatelh) -* [James A. Fellows Yates](https://github.com/jfy133) -* [Jesper Eisfeldt](https://github.com/J35P312) -* [Johannes Alneberg](https://github.com/alneberg) -* [José Fernández Navarro](https://github.com/jfnavarro) -* [Lucia Conde](https://github.com/lconde-ucl) -* [Malin Larsson](https://github.com/malinlarsson) -* [Marcel Martin](https://github.com/marcelm) -* [Nilesh Tawari](https://github.com/nilesh-tawari) -* [Olga Botvinnik](https://github.com/olgabot) -* [Paul Cantalupo](https://github.com/pcantalupo) -* [Phil Ewels](https://github.com/ewels) -* [Sabrina Krakau](https://github.com/skrakau) -* [Sebastian-D](https://github.com/Sebastian-D) -* [Tobias Koch](https://github.com/KochTobi) -* [Winni Kretzschmar](https://github.com/winni2k) -* [arontommi](https://github.com/arontommi) -* [bjornnystedt](https://github.com/bjornnystedt) -* [cgpu](https://github.com/cgpu) -* [gulfshores](https://github.com/gulfshores) -* [pallolason](https://github.com/pallolason) -* [silviamorins](https://github.com/silviamorins) +- [Abhinav Sharma](https://github.com/abhi18av) +- [Adam Talbot](https://github.com/adamrtalbot) +- [Adrian Lärkeryd](https://github.com/adrlar) +- [Àitor Olivares](https://github.com/AitorPeseta) +- [Alexander Peltzer](https://github.com/apeltzer) +- [Alison Meynert](https://github.com/ameynert) +- [Anders Sune Pedersen](https://github.com/asp8200) +- [arontommi](https://github.com/arontommi) +- [BarryDigby](https://github.com/BarryDigby) +- [Bekir Ergüner](https://github.com/berguner) +- [bjornnystedt](https://github.com/bjornnystedt) +- [cgpu](https://github.com/cgpu) +- [Chela James](https://github.com/chelauk) +- [David Mas-Ponte](https://github.com/davidmasp) +- [Edmund Miller](https://github.com/edmundmiller) +- [Famke Bäuerle](https://github.com/famosab) +- [Francesco Lescai](https://github.com/lescai) +- [Francisco Martínez](https://github.com/nevinwu) +- [Gavin Mackenzie](https://github.com/GCJMackenzie) +- [Gisela Gabernet](https://github.com/ggabernet) +- [Grant Neilson](https://github.com/grantn5) +- [gulfshores](https://github.com/gulfshores) +- [Harshil Patel](https://github.com/drpatelh) +- [Hongwei Ye](https://github.com/YeHW) +- [James A. Fellows Yates](https://github.com/jfy133) +- [Jesper Eisfeldt](https://github.com/J35P312) +- [Johannes Alneberg](https://github.com/alneberg) +- [Jonas Kjellin](https://github.com/kjellinjonas) +- [José Fernández Navarro](https://github.com/jfnavarro) +- [Júlia Mir Pedrol](https://github.com/mirpedrol) +- [Ken Brewer](https://github.com/kenibrewer) +- [Lasse Westergaard Folkersen](https://github.com/lassefolkersen) +- [Lucia Conde](https://github.com/lconde-ucl) +- [Louis Le Nézet](https://github.com/LouisLeNezet) +- [Malin Larsson](https://github.com/malinlarsson) +- [Marcel Martin](https://github.com/marcelm) +- [Nick Smith](https://github.com/nickhsmith) +- [Nicolas Schcolnicov](https://github.com/nschcolnicov) +- [Nilesh Tawari](https://github.com/nilesh-tawari) +- [Nils Homer](https://github.com/nh13) +- [Olga Botvinnik](https://github.com/olgabot) +- [Oskar Wacker](https://github.com/WackerO) +- [pallolason](https://github.com/pallolason) +- [Paul Cantalupo](https://github.com/pcantalupo) +- [Phil Ewels](https://github.com/ewels) +- [Pierre Lindenbaum](https://github.com/lindenb) +- [Sabrina Krakau](https://github.com/skrakau) +- [Sam Minot](https://github.com/sminot) +- [Sebastian-D](https://github.com/Sebastian-D) +- [Silvia Morini](https://github.com/silviamorins) +- [Simon Pearce](https://github.com/SPPearce) +- [Solenne Correard](https://github.com/scorreard) +- [Susanne Jodoin](https://github.com/SusiJo) +- [Szilveszter Juhos](https://github.com/szilvajuhos) +- [Tobias Koch](https://github.com/KochTobi) +- [Winni Kretzschmar](https://github.com/winni2k) +- [Patricie Skaláková](https://github.com/Patricie34) ## Acknowledgements -[![Barntumörbanken](docs/images/BTB_logo.png)](https://ki.se/forskning/barntumorbanken) | [![SciLifeLab](docs/images/SciLifeLab_logo.png)](https://scilifelab.se) -:-:|:-: -[![National Genomics Infrastructure](docs/images/NGI_logo.png)](https://ngisweden.scilifelab.se/) | [![National Bioinformatics Infrastructure Sweden](docs/images/NBIS_logo.png)](https://nbis.se) -[![QBiC](docs/images/QBiC_logo.png)](hhttps://www.qbic.uni-tuebingen.de) | +| [![Barntumörbanken](docs/images/BTB_logo.png)](https://ki.se/forskning/barntumorbanken) | [![SciLifeLab](docs/images/SciLifeLab_logo.png)](https://scilifelab.se) | +| :-----------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------: | +| [![National Genomics Infrastructure](docs/images/NGI_logo.png)](https://ngisweden.scilifelab.se/) | [![National Bioinformatics Infrastructure Sweden](docs/images/NBIS_logo.png)](https://nbis.se) | +| [![QBiC](docs/images/QBiC_logo.png)](https://www.qbic.uni-tuebingen.de) | [![GHGA](docs/images/GHGA_logo.png)](https://www.ghga.de/) | +| [![DNGC](docs/images/DNGC_logo.png)](https://eng.ngc.dk/) | | ## Contributions & Support If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). -For further information or help, don't hesitate to get in touch on the [Slack `#sarek` channel](https://nfcore.slack.com/channels/sarek) (you can join with [this invite](https://nf-co.re/join/slack)), or contact us: [Gisela Gabernet](mailto:gisela.gabernet@qbic.uni-tuebingen.de?subject=[GitHub]%20nf-core/sarek), [Maxime Garcia](mailto:maxime.garcia@scilifelab.se?subject=[GitHub]%20nf-core/sarek), [Friederike Hanssen](mailto:friederike.hanssen@qbic.uni-tuebingen.de?subject=[GitHub]%20nf-core/sarek), [Szilvester Juhos](mailto:szilveszter.juhos@scilifelab.se?subject=[GitHub]%20nf-core/sarek) +For further information or help, don't hesitate to get in touch on the [Slack `#sarek` channel](https://nfcore.slack.com/channels/sarek) (you can join with [this invite](https://nf-co.re/join/slack)), or contact us: [Maxime U Garcia](mailto:maxime.garcia@seqera.io?subject=[GitHub]%20nf-core/sarek), [Friederike Hanssen](mailto:friederike.hanssen@qbic.uni-tuebingen.de?subject=[GitHub]%20nf-core/sarek) ## Citations If you use `nf-core/sarek` for your analysis, please cite the `Sarek` article as follows: -> Garcia M, Juhos S, Larsson M et al. **Sarek: A portable workflow for whole-genome sequencing analysis of germline and somatic variants [version 2; peer review: 2 approved]** *F1000Research* 2020, 9:63 [doi: 10.12688/f1000research.16665.2](http://dx.doi.org/10.12688/f1000research.16665.2). -You can cite the sarek zenodo record for a specific version using the following [doi: 10.5281/zenodo.3476426](https://zenodo.org/badge/latestdoi/184289291) +> Friederike Hanssen, Maxime U Garcia, Lasse Folkersen, Anders Sune Pedersen, Francesco Lescai, Susanne Jodoin, Edmund Miller, Oskar Wacker, Nicholas Smith, nf-core community, Gisela Gabernet, Sven Nahnsen **Scalable and efficient DNA sequencing analysis on different compute infrastructures aiding variant discovery** _NAR Genomics and Bioinformatics_ Volume 6, Issue 2, June 2024, lqae031, [doi: 10.1093/nargab/lqae031](https://doi.org/10.1093/nargab/lqae031). + +> Garcia M, Juhos S, Larsson M et al. **Sarek: A portable workflow for whole-genome sequencing analysis of germline and somatic variants [version 2; peer review: 2 approved]** _F1000Research_ 2020, 9:63 [doi: 10.12688/f1000research.16665.2](http://dx.doi.org/10.12688/f1000research.16665.2). + +You can cite the sarek zenodo record for a specific version using the following [doi: 10.5281/zenodo.3476425](https://doi.org/10.5281/zenodo.3476425) -In addition, references of tools and data used in this pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. +An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. You can cite the `nf-core` publication as follows: @@ -148,4 +231,4 @@ You can cite the `nf-core` publication as follows: ## CHANGELOG -* [CHANGELOG](CHANGELOG.md) +- [CHANGELOG](CHANGELOG.md) diff --git a/assets/email_template.html b/assets/email_template.html index 36a9983efd..8340c9a2e4 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,7 +12,7 @@ -

nf-core/sarek v${version}

+

nf-core/sarek ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index 2be647eac1..18a7e3e722 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,9 +4,8 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/sarek v${version} + nf-core/sarek ${version} ---------------------------------------------------- - Run Name: $runName <% if (success){ diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 0000000000..fec6e66d3f --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,27 @@ +id: "nf-core-sarek-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/sarek Methods Description" +section_href: "https://github.com/nf-core/sarek" +plot_type: "html" +data: | +

Methods

+

Data was processed using nf-core/sarek v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

${tool_citations}

+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography} +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml deleted file mode 100644 index f390946c77..0000000000 --- a/assets/multiqc_config.yaml +++ /dev/null @@ -1,34 +0,0 @@ -custom_logo: ../../../docs/images/nf-core_sarek_logo.png -custom_logo_url: https://github.com/nf-core/sarek/ -custom_logo_title: 'nf-core/sarek' - -report_comment: > - This report has been generated by the nf-core/sarek - analysis pipeline. For information about how to interpret these results, please see the - documentation. -report_section_order: - software_versions: - order: -1000 - nf-core-sarek-summary: - order: -1001 - -export_plots: true - -top_modules: -- 'fastqc': - name: 'FastQC' - path_filters_exclude: - - '*trimmed_fastqc*' -- 'cutadapt' -- 'fastqc': - name: 'FastQC after trimming' - info: 'FastQC after applying TrimGalore.' - path_filters: - - '*trimmed_fastqc*' -- 'picard' -- 'gatk' -- 'samtools' -- 'qualimap' -- 'bcftools' -- 'vcftools' -- 'snpeff' diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml new file mode 100644 index 0000000000..1ab45319b9 --- /dev/null +++ b/assets/multiqc_config.yml @@ -0,0 +1,107 @@ +custom_logo: "nf-core-sarek_logo_light.png" +custom_logo_url: https://github.com/nf-core/sarek/ +custom_logo_title: "nf-core/sarek" + +report_comment: > + This report has been generated by the nf-core/sarek analysis pipeline. For information about how to interpret these results, please see the documentation. +report_section_order: + "nf-core-sarek-methods-description": + order: -1000 + software_versions: + order: -1001 + "nf-core-sarek-summary": + order: -1002 + +export_plots: true +export_plots_timeout: 660 + +disable_version_detection: true + +# Run only these modules +run_modules: + - custom_content + - fastqc + - fastp + - bbmap + - picard + - samtools + - mosdepth + - gatk + - bcftools + - vcftools + - snpeff + - vep + +module_order: + - fastqc: + name: "FastQC (raw)" + path_filters_exclude: + - "*_val_*.zip" + - fastp: + name: "FastP (Read preprocessing)" + - bbmap: + name: "BBsplit (Reference genome binning)" + - picard: + name: "GATK4 MarkDuplicates" + info: " metrics generated either by GATK4 MarkDuplicates or EstimateLibraryComplexity (with --use_gatk_spark)." + - samtools: + name: "Samtools Flagstat" + - mosdepth: + name: "Mosdepth" + - gatk: + name: "GATK4 BQSR" + - bcftools: + name: "Bcftools" + - vcftools: + name: "Vcftools" + - snpeff: + name: "SNPeff" + - vep: + name: "VEP" + +extra_fn_clean_exts: + - "_val" + - type: regex_keep + pattern: "^.*.(md|recal).mosdepth.(global|region).dist" + module: mosdepth + +sample_names_replace_regex: true +sample_names_replace: + "\\.[0-9]{4}$": ".md" # should match ".0001" but only at the end of strings for module Markduplicates/EstimateLibraryComplexity + module: picard + +custom_data: + dedup_metrics: + id: "dedup_metrics" + section_name: "Sentieon Dedup Metrics" + plot_type: "table" + pconfig: + id: "dedup_metrics" + namespace: "Sentieon Dedup Metrics" + headers: + LIBRARY: + description: "LIBRARY" + UNPAIRED_READS_EXAMINED: + description: "UNPAIRED_READS_EXAMINE" + READ_PAIRS_EXAMINED: + description: "READ_PAIRS_EXAMINED" + SECONDARY_OR_SUPPLEMENTARY_RDS: + description: "SECONDARY_OR_SUPPLEMENTARY_RDS" + UNMAPPED_READS: + description: "UNMAPPED_READS" + UNPAIRED_READ_DUPLICATES: + description: "UNPAIRED_READ_DUPLICATES" + READ_PAIR_DUPLICATES: + description: "READ_PAIR_DUPLICATES" + READ_PAIR_OPTICAL_DUPLICATES: + description: "READ_PAIR_OPTICAL_DUPLICATES" + PERCENT_DUPLICATION: + description: "PERCENT_DUPLICATION" + ESTIMATED_LIBRARY_SIZE: + description: "ESTIMATED_LIBRARY_SIZE" +sp: + snpeff: + contents: "SnpEff_version" + max_filesize: 5000000 + dedup_metrics: + fn: "*.metrics.multiqc.tsv" diff --git a/assets/nf-core-sarek_logo_light.png b/assets/nf-core-sarek_logo_light.png index 208d16951e..61aa1c81d5 100644 Binary files a/assets/nf-core-sarek_logo_light.png and b/assets/nf-core-sarek_logo_light.png differ diff --git a/assets/schema_input.json b/assets/schema_input.json index 05a1347073..3c910dc055 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/sarek/master/assets/schema_input.json", "title": "nf-core/sarek pipeline - params.input schema", "description": "Schema for the file provided with params.input", @@ -10,161 +10,131 @@ "patient": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "ID name must be provided and cannot contain spaces" + "errorMessage": "Patient ID must be provided, cannot contain spaces and must be a string value", + "meta": ["patient"] }, "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" - }, - "gender": { - "errorMessage": "Gender cannot contain spaces", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "errorMessage": "Sample ID must be provided, cannot contain spaces and must be a string value", + "meta": ["sample"] + }, + "sex": { + "errorMessage": "Sex must be one of 'XX', 'XY', or 'NA'", + "meta": ["sex"], + "enum": ["XX", "XY", "NA"], + "default": "NA", + "type": "string" }, "status": { - "errorMessage": "Status can only be 0 or 1", - "anyOf": [ - { - "type": "string", - "pattern": "^(0|1)*$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "integer", + "errorMessage": "Status can only be 0 (normal) or 1 (tumor). Defaults to 0, if none is supplied.", + "meta": ["status"], + "default": 0, + "enum": [0, 1] }, "lane": { - "errorMessage": "Lane cannot contain spaces", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+$" - }, - { - "type": "string", - "maxLength": 0 - } - ] - + "anyOf": [{ "type": "integer" }, { "type": "string" }], + "pattern": "^\\S+$", + "description": "Lane identifier used to distinguish between different sequencing runs of the same sample.", + "errorMessage": "Lane identifier cannot contain spaces", + "meta": ["lane"] }, "fastq_1": { - "errorMessage": "FastQ file for reads 1 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "spring_1": { + "errorMessage": "Gzipped and spring-compressed FastQ file for reads 1 cannot contain spaces, has to be a string, has to exist and must have extension '.fq.gz.spring' or '.fastq.gz.spring'", + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz.spring$", + "format": "file-path", + "exists": true + }, + "spring_2": { + "errorMessage": "Gzipped and spring-compressed FastQ file for reads 2 cannot contain spaces, has to be a string, has to exist and must have extension '.fq.gz.spring' or '.fastq.gz.spring'", + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz.spring$", + "format": "file-path", + "exists": true }, "table": { - "errorMessage": "Recalibration table cannot contain spaces and must have extension '.table'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.table$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "errorMessage": "Recalibration table cannot contain spaces, has to be a string, has to exist and must have extension '.table'", + "type": "string", + "pattern": "^\\S+\\.table$", + "format": "file-path", + "exists": true }, "cram": { - "errorMessage": "CRAM file cannot contain spaces and must have extension '.cram'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.cram$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "errorMessage": "CRAM file cannot contain spaces, has to be a string, has to exist and must have extension '.cram'", + "type": "string", + "pattern": "^\\S+\\.cram$", + "format": "file-path", + "exists": true }, "crai": { - "errorMessage": "CRAM index file cannot contain spaces and must have extension '.crai'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.crai$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "errorMessage": "CRAM index file cannot contain spaces, has to be a string, has to exist and must have extension '.crai'", + "type": "string", + "pattern": "^\\S+\\.crai$", + "format": "file-path", + "exists": true }, "bam": { - "errorMessage": "BAM file cannot contain spaces and must have extension '.bam'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.bam$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "errorMessage": "BAM file cannot contain spaces, has to be a string, has to exist and must have extension '.bam'", + "type": "string", + "pattern": "^\\S+\\.bam$", + "format": "file-path", + "exists": true }, "bai": { - "errorMessage": "BAM index file cannot contain spaces and must have extension '.bai'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.bai$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "errorMessage": "BAM index file cannot contain spaces, has to be a string, has to exist and must have extension '.bai'", + "type": "string", + "pattern": "^\\S+\\.bai$", + "format": "file-path", + "exists": true + }, + "contamination": { + "errorMessage": "(1 - tumor purity) must be a value between 0 and 1, set to 0 for normal samples", + "type": "number", + "exists": true }, "vcf": { - "errorMessage": "VCF file for reads 1 cannot contain spaces and must have extension '.vcf' or '.vcf.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.vcf(\\.gz)?$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "errorMessage": "VCF file for reads 1 cannot contain spaces, has to be a string, has to exist and must have extension '.vcf' or '.vcf.gz'", + "type": "string", + "pattern": "^\\S+\\.vcf(\\.gz)?$", + "format": "file-path", + "exists": true + }, + "variantcaller": { + "type": "string" + } + }, + "anyOf": [ + { + "dependentRequired": { "lane": ["fastq_1"] } + }, + { + "dependentRequired": { "lane": ["spring_1"] } + }, + { + "dependentRequired": { "lane": ["bam"] } } + ], + "dependentRequired": { + "fastq_2": ["fastq_1"], + "spring_2": ["spring_1"] }, - "required": [ - "patient", - "sample" - ] + "required": ["patient", "sample"], + "uniqueEntries": ["lane", "patient", "sample"] } } diff --git a/assets/schema_snpsift_databases.json b/assets/schema_snpsift_databases.json new file mode 100644 index 0000000000..acf5a5708c --- /dev/null +++ b/assets/schema_snpsift_databases.json @@ -0,0 +1,36 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/sarek/master/assets/schema_snpsift_databases.json", + "title": "nf-core/sarek - params.snpsift_databases schema", + "description": "Schema for the SnpSift annotation databases CSV samplesheet", + "type": "array", + "items": { + "type": "object", + "properties": { + "vcf": { + "type": "string", + "pattern": "^\\S+\\.vcf(\\.gz)?$", + "errorMessage": "VCF file path must be provided and end with .vcf or .vcf.gz" + }, + "tbi": { + "type": "string", + "pattern": "^\\S+\\.tbi$", + "errorMessage": "Index file must end with .tbi" + }, + "fields": { + "type": "string", + "errorMessage": "Semicolon-separated INFO field names to extract (e.g., DP;AF;MQ)" + }, + "prefix": { + "type": "string", + "errorMessage": "Prefix for annotated field names" + }, + "vardb": { + "type": "string", + "format": "directory-path", + "errorMessage": "Path to pre-built .snpsift.vardb directory" + } + }, + "required": ["vcf"] + } +} diff --git a/assets/snpsift_databases_example.csv b/assets/snpsift_databases_example.csv new file mode 100644 index 0000000000..61a7e1cac8 --- /dev/null +++ b/assets/snpsift_databases_example.csv @@ -0,0 +1,7 @@ +vcf,tbi,fields,prefix,vardb +/path/to/annotations/dbsnp/dbsnp_156.vcf.gz,,,dbSNP_,/path/to/annotations/dbsnp/dbsnp_156.vcf.gz.snpsift.vardb +/path/to/annotations/gnomad/gnomad.genomes.v4.0.sites.vcf.gz,,AF;AC;AN;nhomalt,gnomAD_, +/path/to/annotations/clinvar/clinvar_20240101.vcf.gz,,CLNSIG;CLNDN;CLNREVSTAT,ClinVar_, +/path/to/annotations/1000genomes/ALL.wgs.phase3.sites.vcf.gz,,AF;EAS_AF;EUR_AF;AFR_AF,1000G_, +/path/to/annotations/cosmic/cosmic_v99.vcf.gz,,GENE;STRAND;CDS;AA;CNT,COSMIC_, +/path/to/annotations/custom/my_annotations.vcf.gz,/path/to/custom.vcf.gz.tbi,CUSTOM_FIELD,, diff --git a/assets/varlociraptor_germline.yte.yaml b/assets/varlociraptor_germline.yte.yaml new file mode 100644 index 0000000000..cbef14a5e9 --- /dev/null +++ b/assets/varlociraptor_germline.yte.yaml @@ -0,0 +1,20 @@ +species: + heterozygosity: 0.001 + germline-mutation-rate: 1e-3 + ploidy: + male: + all: 2 + X: 1 + Y: 1 + female: + all: 2 + X: 2 + Y: 0 + genome-size: 3.5e9 + +samples: + normal: + sex: ?sex_string + +events: + germline: "normal:0.5 | normal:1.0" diff --git a/assets/varlociraptor_somatic.yte.yaml b/assets/varlociraptor_somatic.yte.yaml new file mode 100644 index 0000000000..1db08f41b4 --- /dev/null +++ b/assets/varlociraptor_somatic.yte.yaml @@ -0,0 +1,34 @@ +species: + heterozygosity: 0.001 + germline-mutation-rate: 1e-3 + ploidy: + male: + all: 2 + X: 1 + Y: 1 + female: + all: 2 + X: 2 + Y: 0 + genome-size: 3.5e9 + +samples: + tumor: + sex: ?sex_string + somatic-effective-mutation-rate: 1e-6 + inheritance: + clonal: + from: normal + somatic: true + contamination: + by: normal + fraction: ?contamination + normal: + sex: ?sex_string + somatic-effective-mutation-rate: 1e-10 + +events: + somatic_tumor_high: "normal:0.0 & tumor:[0.1,1.0]" + somatic_tumor_low: "normal:0.0 & tumor:]0.0,0.1[" + somatic_normal: "normal:]0.0,0.5[" + germline: "normal:0.5 | normal:1.0" diff --git a/assets/varlociraptor_tumor_only.yte.yaml b/assets/varlociraptor_tumor_only.yte.yaml new file mode 100644 index 0000000000..04228b386f --- /dev/null +++ b/assets/varlociraptor_tumor_only.yte.yaml @@ -0,0 +1,34 @@ +species: + heterozygosity: 0.001 + germline-mutation-rate: 1e-3 + ploidy: + male: + all: 2 + X: 1 + Y: 1 + female: + all: 2 + X: 2 + Y: 0 + genome-size: 3.5e9 + +samples: + tumor: + sex: ?sex_string + somatic-effective-mutation-rate: 1e-6 + inheritance: + clonal: + from: normal + somatic: true + contamination: + by: normal + fraction: ?contamination + normal: + # TODO: remove "sex: ?sex_string" when updating varlociraptor, will be fixed in coming release + sex: ?sex_string + universe: "0.0 | 0.5 | 1.0" + +events: + somatic_tumor_high: "normal:0.0 & tumor:[0.1,1.0]" + somatic_tumor_low: "normal:0.0 & tumor:]0.0,0.1[" + germline: "normal:0.5 | normal:1.0" diff --git a/bin/concatenateVCFs.sh b/bin/concatenateVCFs.sh deleted file mode 100755 index e655de91f6..0000000000 --- a/bin/concatenateVCFs.sh +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# This script concatenates all VCF files that are in the local directory, -# that were created from different intervals to make a single final VCF - -usage() { echo "Usage: $0 [-i genome_index_file] [-o output.file.no.gz.extension] <-t target.bed> <-c cpus> <-n>" 1>&2; exit 1; } - -while [[ $# -gt 0 ]] -do - key=$1 - case $key in - -i) - genomeIndex=$2 - shift # past argument - shift # past value - ;; - -c) - cpus=$2 - shift # past argument - shift # past value - ;; - -o) - outputFile=$2 - shift # past argument - shift # past value - ;; - -t) - targetBED=$2 - shift # past argument - shift # past value - ;; - -n) - noInt=1 - shift # past argument - ;; - *) - usage - shift # past argument - ;; - esac -done - -if [ -z ${genomeIndex} ]; then echo "Missing index file "; usage; fi -if [ -z ${cpus} ]; then echo "No CPUs defined: setting to 1"; cpus=1; fi -if [ -z ${outputFile} ]; then echo "Missing output file name"; usage; fi - -if [ -z ${noInt+x} ] -then - # First make a header from one of the VCF - # Remove interval information from the GATK command-line, but leave the rest - FIRSTVCF=$(set +o pipefail; ls *.vcf | head -n 1) - sed -n '/^[^#]/q;p' $FIRSTVCF | \ - awk '!/GATKCommandLine/{print}/GATKCommandLine/{for(i=1;i<=NF;i++){if($i!~/intervals=/ && $i !~ /out=/){printf("%s ",$i)}}printf("\n")}' \ - > header - - # Get list of contigs from the FASTA index (.fai) - # ##contig header in the VCF cannot be used as it is optional (FreeBayes does not save it, for example) - - CONTIGS=($(cut -f1 ${genomeIndex})) - - #Concatenate VCFs in the correct order - ( - cat header - - for chr in "${CONTIGS[@]}"; do - # Skip if globbing would not match any file to avoid errors such as - # "ls: cannot access chr3_*.vcf: No such file or directory" when chr3 - # was not processed. - pattern="*_${chr}_*.vcf" - if ! compgen -G "${pattern}" > /dev/null ; then continue; fi - - # ls -v sorts by numeric value ("version"), which means that chr1_100_ - # is sorted *after* chr1_99_. - for vcf in $(ls -v ${pattern}); do - # Determine length of header. - # The 'q' command makes sed exit when it sees the first non-header - # line, which avoids reading in the entire file. - L=$(sed -n '/^[^#]/q;p' ${vcf} | wc -l) - - # Then print all non-header lines. Since tail is very fast (nearly as - # fast as cat), this is way more efficient than using a single sed, - # awk or grep command. - tail -n +$((L+1)) ${vcf} - done - done - ) | bgzip -@${cpus} > rawcalls.unsorted.vcf.gz -else - VCF=$(ls no_intervals*.vcf) - cp $VCF rawcalls.unsorted.vcf - bgzip -@${cpus} rawcalls.unsorted.vcf -fi - -bcftools sort rawcalls.unsorted.vcf.gz | bgzip > rawcalls.vcf.gz -tabix -p vcf rawcalls.vcf.gz - -set +u - -# Now we have the concatenated VCF file, check for WES/panel targets, and generate a subset if there is a BED provided -if [ ! -z ${targetBED+x} ]; then - echo "Target is $targetBED - Selecting subset..." - bcftools isec --targets-file ${targetBED} rawcalls.vcf.gz | bgzip -@${cpus} > ${outputFile}.gz - tabix ${outputFile}.gz -else - # Rename the raw calls as WGS results - for f in rawcalls.vcf*; do mv -v $f ${outputFile}${f#rawcalls.vcf}; done -fi diff --git a/bin/convertAlleleCounts.r b/bin/convertAlleleCounts.r deleted file mode 100755 index a3b50384a9..0000000000 --- a/bin/convertAlleleCounts.r +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env Rscript - -# Description: -# R-script for converting output from AlleleCount to BAF and LogR values. -# -# -# Input: -# AlleleCounter output file for tumor and normal samples -# The first line should contain a header describing the data -# The following columns and headers should be present: -# CHR POS Count_A Count_C Count_G Count_T Good_depth -# -# Output: -# BAF and LogR tables (tab delimited text files) -################################################################################ - -##First read in the arguments listed at the command line -args = commandArgs(trailingOnly=TRUE) - -## args is now a list of character vectors -## First check to see if arguments are passed. -if(length(args)<5){ - stop("No input files supplied\n\nUsage:\nRscript convertAlleleCounts.r tumorid tumorac normalid normalac gender\nWhere:\ntumorid - id of tumor sample\ntumorac - output from AlleleCount for the tumor\nnormalid - id of normal sample\nnormalac - output from AlleleCount for the normal\ngender - XX or XY\n\n") -} else{ - tumorid = args[1] - tumorac = args[2] - normalid = args[3] - normalac = args[4] - gender = args[5] -} - - - -tumorcounts = read.table(tumorac, header=F, sep="\t") -normalcounts = read.table(normalac, header=F, sep="\t") - -SNPpos = matrix(nrow = dim(normalcounts)[1],ncol = 2) - -rownames(SNPpos) = paste("snp",1:dim(SNPpos)[1],sep="") -colnames(SNPpos) = c("Chr","Position") -SNPpos[,1] = as.vector(normalcounts[,1]) -SNPpos[,2] = normalcounts[,2] - - -Tumor_BAF = matrix(nrow = dim(normalcounts)[1],ncol = 1) -rownames(Tumor_BAF) = rownames(SNPpos) -colnames(Tumor_BAF) = c(tumorid) -acgt = tumorcounts[,c(3:6)] -acgts = t(apply(acgt,1,sort)) -Tumor_BAF[,1] = acgts[,4]/(acgts[,3]+acgts[,4]) -Tumor_BAF[,1] = ifelse(runif(length(Tumor_BAF[,1]))<0.5,Tumor_BAF[,1],1-Tumor_BAF[,1]) -Tumor_BAF[is.nan(Tumor_BAF)]=NA - -Germline_BAF = matrix(nrow = dim(normalcounts)[1],ncol = 1) -rownames(Germline_BAF) = rownames(SNPpos) -colnames(Germline_BAF) = c(normalid) -acgt = normalcounts[,c(3:6)] -acgts = t(apply(acgt,1,sort)) -Germline_BAF[,1] = acgts[,4]/(acgts[,3]+acgts[,4]) -Germline_BAF[,1] = ifelse(runif(length(Germline_BAF[,1]))<0.5,Germline_BAF[,1],1-Germline_BAF[,1]) -Germline_BAF[is.nan(Germline_BAF)]=NA - - -Tumor_LogR = matrix(nrow = dim(normalcounts)[1],ncol = 1) -Germline_LogR = matrix(nrow = dim(normalcounts)[1],ncol = 1) -rownames(Tumor_LogR) = rownames(SNPpos) -colnames(Tumor_LogR) = c(tumorid) -rownames(Germline_LogR) = rownames(SNPpos) -colnames(Germline_LogR) = c(normalid) -Tumor_LogR[,1] = log(tumorcounts[,7]/normalcounts[,7],2) -Germline_LogR[,1] = 0 - -Tumor_LogR[! is.finite(Tumor_LogR)]=NA # infinite = coverage in normal only, NaN = no coverage in tumour or normal - -if(gender=="XY") { - Tumor_LogR[SNPpos[,1]=="X",1] = Tumor_LogR[SNPpos[,1]=="X",1]-1 - Germline_LogR[SNPpos[,1]=="X",1] = Germline_LogR[SNPpos[,1]=="X",1]-1 - Tumor_LogR[SNPpos[,1]=="Y",1] = Tumor_LogR[SNPpos[,1]=="Y",1]-1 - Germline_LogR[SNPpos[,1]=="Y",1] = Germline_LogR[SNPpos[,1]=="Y",1]-1 -} - -Tumor_LogR[,1] = Tumor_LogR[,1] - median(Tumor_LogR[,1],na.rm=T) - -# set regions with 0 reads in tumor and normal to a LogR of 0. -Tumor_LogR[is.na(Tumor_LogR[,1]),1] = 0 - -# limit the number of digits: -Tumor_LogR = round(Tumor_LogR,4) -Tumor_BAF = round(Tumor_BAF,4) -Germline_LogR = round(Germline_LogR,4) -Germline_BAF = round(Germline_BAF,4) - -# write output to files -write.table(cbind(SNPpos,Tumor_LogR),paste(tumorid,".LogR",sep=""),sep="\t",row.names=T,col.names=NA,quote=F) -write.table(cbind(SNPpos,Tumor_BAF),paste(tumorid,".BAF",sep=""),sep="\t",row.names=T,col.names=NA,quote=F) -write.table(cbind(SNPpos,Germline_LogR),paste(normalid,".LogR",sep=""),sep="\t",row.names=T,col.names=NA,quote=F) -write.table(cbind(SNPpos,Germline_BAF),paste(normalid,".BAF",sep=""),sep="\t",row.names=T,col.names=NA,quote=F) diff --git a/bin/license_message.py b/bin/license_message.py new file mode 100644 index 0000000000..9ba2abba36 --- /dev/null +++ b/bin/license_message.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 + +######################################### +# Author: [DonFreed](https://github.com/DonFreed) +# File: license_message.py +# Source: https://github.com/DonFreed/docker-actions-test/blob/main/.github/scripts/license_message.py +# Source+commit: https://github.com/DonFreed/docker-actions-test/blob/aa1051a9f53b3a1e801953748d062cad74dca9a9/.github/scripts/license_message.py +# Download Date: 2023-07-04, commit: aa1051a +# This source code is licensed under the BSD 2-Clause license +######################################### + +""" +Functions for generating and sending license messages +""" + +# Modified from - https://stackoverflow.com/a/59835994 + +import argparse +import base64 +import calendar +import re +import secrets +import sys + +from cryptography.hazmat.primitives.ciphers.aead import AESGCM +from datetime import datetime as dt + +MESSAGE_TIMEOUT = 60 * 60 * 24 # Messages are valid for 1 day +NONCE_BYTES = 12 + + +class DecryptionTimeout(Exception): + # Decrypting a message that is too old + pass + + +def generate_key(): + key = secrets.token_bytes(32) + return key + + +def handle_generate_key(args): + key = generate_key() + key_b64 = base64.b64encode(key) + print(key_b64.decode("utf-8"), file=args.outfile) + + +def encrypt_message(key, message): + nonce = secrets.token_bytes(NONCE_BYTES) + timestamp = calendar.timegm(dt.now().utctimetuple()) + data = timestamp.to_bytes(10, byteorder="big") + b"__" + message + ciphertext = nonce + AESGCM(key).encrypt(nonce, data, b"") + return ciphertext + + +def handle_encrypt_message(args): + key = base64.b64decode(args.key.encode("utf-8")) + message = args.message.encode("utf-8") + ciphertext = encrypt_message(key, message) + ciphertext_b64 = base64.b64encode(ciphertext) + print(ciphertext_b64.decode("utf-8"), file=args.outfile) + + +def decrypt_message(key, ciphertext, timeout=MESSAGE_TIMEOUT): + nonce, ciphertext = ciphertext[:NONCE_BYTES], ciphertext[NONCE_BYTES:] + message = AESGCM(key).decrypt(nonce, ciphertext, b"") + + msg_timestamp, message = re.split(b"__", message, maxsplit=1) + msg_timestamp = int.from_bytes(msg_timestamp, byteorder="big") + timestamp = calendar.timegm(dt.now().utctimetuple()) + if (timestamp - msg_timestamp) > timeout: + raise DecryptionTimeout("The message has an expired timeout") + return message.decode("utf-8") + + +def handle_decrypt_message(args): + key = base64.b64decode(args.key.encode("utf-8")) + ciphertext = base64.b64decode(args.message.encode("utf-8")) + message = decrypt_message(key, ciphertext, timeout=args.timeout) + print(str(message), file=args.outfile) + + +def parse_args(argv=None): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--outfile", default=sys.stdout, type=argparse.FileType("w"), help="The output file") + + subparsers = parser.add_subparsers(help="Available sub-commands") + + gen_parser = subparsers.add_parser("generate_key", help="Generate a random key string") + gen_parser.set_defaults(func=handle_generate_key) + + encrypt_parser = subparsers.add_parser("encrypt", help="Encrypt a message") + encrypt_parser.add_argument("--key", required=True, help="The encryption key") + encrypt_parser.add_argument("--message", required=True, help="Message to encrypt") + encrypt_parser.set_defaults(func=handle_encrypt_message) + + decrypt_parser = subparsers.add_parser("decrypt", help="Decyrpt a message") + decrypt_parser.add_argument("--key", required=True, help="The encryption key") + decrypt_parser.add_argument("--message", required=True, help="Message to decrypt") + decrypt_parser.add_argument( + "--timeout", + default=MESSAGE_TIMEOUT, + type=int, + help="A message timeout. Decryption will fail for older messages", + ) + decrypt_parser.set_defaults(func=handle_decrypt_message) + + return parser.parse_args(argv) + + +if __name__ == "__main__": + args = parse_args() + args.func(args) diff --git a/bin/run_ascat.r b/bin/run_ascat.r deleted file mode 100755 index 6b20027bf6..0000000000 --- a/bin/run_ascat.r +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env Rscript -library("optparse") -option_list = list( - make_option("--tumorbaf", type="character", default=NULL, - help="tumor BAF file", metavar="character"), - make_option("--tumorlogr", type="character", default=NULL, - help="tumor LogR file", metavar="character"), - make_option("--normalbaf", type="character", default=NULL, - help="normal BAF file", metavar="character"), - make_option("--normallogr", type="character", default=NULL, - help="normal LogR file", metavar="character"), - make_option("--tumorname", type="character", default=NULL, - help="name of tumor sample file", metavar="character"), - make_option("--basedir", type="character", default=NULL, - help="main Sarek directory for sample", metavar="character"), - make_option("--gcfile", type="character", default=NULL, - help="GC correction file", metavar="character"), - make_option("--gender", type="character", default=NULL, - help="gender on format XX or XY", metavar="character"), - make_option("--purity", type="double", default=NULL, - help="override Ascat purity parameter (rho_manual) ", metavar="character"), - make_option("--ploidy", type="double", default=NULL, - help="override Ascat ploidy parameter (psi_manual)", metavar="character")) - -opt_parser = OptionParser(option_list=option_list) -opt = parse_args(opt_parser) - -if(is.null(opt$tumorbaf) || is.null(opt$tumorlogr) || is.null(opt$normalbaf) || is.null(opt$normallogr) || is.null(opt$tumorname) || is.null(opt$basedir) || is.null(opt$gcfile) || is.null(opt$gender)) { - print_help(opt_parser) - stop("At least one of the required arguments missing.", call.=FALSE) -} - -library(ASCAT) - -if(!require(RColorBrewer)){ - source("http://bioconductor.org/biocLite.R") - biocLite("RColorBrewer", suppressUpdates=TRUE, lib="$opt$basedir/scripts") - library(RColorBrewer) -} -options(bitmapType='cairo') - - -#Load the data -#ascat.bc <- ascat.loadData(Tumor_LogR_file=opt$tumorlogr, Tumor_BAF_file=opt$tumorbaf, Germline_LogR_file=opt$normallogr, Germline_BAF_file=opt$normalbaf, chrs = c(1:22,"X","Y"), opt$gender = opt$gender, sexchromosomes = c("X", "Y")) - -if(opt$gender=="XY"){ - ascat.bc = ascat.loadData(Tumor_LogR_file = opt$tumorlogr, Tumor_BAF_file = opt$tumorbaf, Germline_LogR_file = opt$normallogr, Germline_BAF_file = opt$normalbaf, gender = opt$gender, chrs = c(1:22,"X","Y"), sexchromosomes = c("X","Y")) -} else { - ascat.bc = ascat.loadData(Tumor_LogR_file = opt$tumorlogr, Tumor_BAF_file = opt$tumorbaf, Germline_LogR_file = opt$normallogr, Germline_BAF_file = opt$normalbaf, gender = opt$gender, chrs = c(1:22,"X"), sexchromosomes = c("X")) - -} - -#GC wave correction -ascat.bc = ascat.GCcorrect(ascat.bc, opt$gcfile) - -#Plot the raw data -ascat.plotRawData(ascat.bc) - -#Segment the data -ascat.bc <- ascat.aspcf(ascat.bc) - -#Plot the segmented data -ascat.plotSegmentedData(ascat.bc) - -#Run ASCAT to fit every tumor to a model, inferring ploidy, normal cell contamination, and discrete copy numbers -#If psi and rho are manually set: -if (!is.null(opt$purity) && !is.null(opt$ploidy)){ - ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=opt$purity, psi_manual=opt$ploidy) -} else if(!is.null(opt$purity) && is.null(opt$ploidy)){ - ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=opt$purity) -} else if(!is.null(opt$ploidy) && is.null(opt$purity)){ - ascat.output <- ascat.runAscat(ascat.bc, gamma=1, psi_manual=opt$ploidy) -} else { - ascat.output <- ascat.runAscat(ascat.bc, gamma=1) -} - -#Write out segmented regions (including regions with one copy of each allele) -#write.table(ascat.output$segments, file=paste(opt$tumorname, ".segments.txt", sep=""), sep="\t", quote=F, row.names=F) - -#Write out CNVs in bed format -cnvs=ascat.output$segments[2:6] -write.table(cnvs, file=paste(opt$tumorname,".cnvs.txt",sep=""), sep="\t", quote=F, row.names=F, col.names=T) - -#Write out purity and ploidy info -summary <- tryCatch({ - matrix(c(ascat.output$aberrantcellfraction, ascat.output$ploidy), ncol=2, byrow=TRUE)}, error = function(err) { - # error handler picks up where error was generated - print(paste("Could not find optimal solution: ",err)) - return(matrix(c(0,0),nrow=1,ncol=2,byrow = TRUE)) - } -) - -colnames(summary) <- c("AberrantCellFraction","Ploidy") -write.table(summary, file=paste(opt$tumorname,".purityploidy.txt",sep=""), sep="\t", quote=F, row.names=F, col.names=T) diff --git a/conf/base.config b/conf/base.config index ce9e5c254b..4e184eca9c 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nf-core/sarek Nextflow base config file -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A 'blank slate' config file, appropriate for general use on most high performance compute environments. Assumes that all software is installed and available on the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. @@ -9,61 +9,103 @@ */ process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - shell = ['/bin/bash', '-euo', 'pipefail'] - errorStrategy = { task.exitStatus in [143,137,104,134,139, 247] ? 'retry' : 'finish' } + // TODO nf-core: Check the defaults for all processes + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 8.h * task.attempt } + + errorStrategy = { task.exitStatus in ((130..145) + 104 + 175) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' // Process-specific resource requirements // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } + withLabel: process_gpu { + ext.use_gpu = { workflow.profile.contains('gpu') } + accelerator = { workflow.profile.contains('gpu') ? 1 : null } + } + withLabel:process_single { + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 8.h * task.attempt } + } withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 8.h * task.attempt } } withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 16.h * task.attempt } } withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + cpus = { 12 * task.attempt } + memory = { 72.GB * task.attempt } + time = { 32.h * task.attempt } } withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } + time = { 40.h * task.attempt } } withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + memory = { 200.GB * task.attempt } } - withLabel:error_ignore { - errorStrategy = 'ignore' + withName: 'UNZIP.*|UNTAR.*|TABIX.*|BUILD_INTERVALS|CREATE_INTERVALS_BED|VCFTOOLS|BCFTOOLS.*|SAMTOOLS_INDEX' { + cpus = { 1 * task.attempt } + memory = { 1.GB * task.attempt } } - withLabel:error_retry { - errorStrategy = 'retry' - maxRetries = 2 + withName: 'FASTQC'{ + cpus = { 4 * task.attempt } + memory = { 4.GB * task.attempt } + } + withName: 'FASTP'{ + cpus = { 12 * task.attempt } + memory = { 4.GB * task.attempt } + } + withName: 'BWAMEM1_MEM|BWAMEM2_MEM' { + cpus = { 24 * task.attempt } + memory = { 30.GB * task.attempt } + } + withName: 'PARABRICKS_FQ2BAM' { + accelerator = { task.executor in ['awsbatch','google-batch','hq','k8s'] ? 4 : null } + } + withName:'CNVKIT_BATCH' { + cpus = { 12 * task.attempt } + memory = { 36.GB * task.attempt } + } + withName: 'GATK4_MARKDUPLICATES|GATK4SPARK_MARKDUPLICATES' { + cpus = { 6 * task.attempt } + memory = { 30.GB * task.attempt } + } + withName:'GATK4_APPLYBQSR|GATK4SPARK_APPLYBQSR|GATK4_BASERECALIBRATOR|GATK4SPARK_BASERECALIBRATOR|GATK4_GATHERBQSRREPORTS'{ + cpus = { 2 * task.attempt } + memory = { 4.GB * task.attempt } } - withName:CONCAT_VCF { - // For unknown reasons, CONCAT_VCF sometimes fails with SIGPIPE - // (exit code 141). Rerunning the process will usually work. - errorStrategy = {task.exitStatus == 141 ? 'retry' : 'terminate'} + withName:'MOSDEPTH'{ + cpus = { 4 * task.attempt } + memory = { 4.GB * task.attempt } } - withName:FASTQC { - errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} + withName:'STRELKA.*|MANTA.*' { + cpus = { 10 * task.attempt } + memory = { 8.GB * task.attempt } } - withName:BWAMEM2_MEM { - memory = { check_max( 60.GB * task.attempt, 'memory' ) } - time = { check_max( 48.h * task.attempt, 'time' ) } + withName:'SAMTOOLS_CONVERT'{ + memory = { 4.GB * task.attempt } } - withName:MULTIQC { - errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} + withName:'GATK4_MERGEVCFS'{ + cpus = { 2 * task.attempt } + memory = { 4.GB * task.attempt } } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false + withName: 'MULTIQC' { + cpus = { 4 * task.attempt } + memory = { 12.GB * task.attempt } } } diff --git a/conf/genomes.config b/conf/genomes.config deleted file mode 100644 index 7317f16f0f..0000000000 --- a/conf/genomes.config +++ /dev/null @@ -1,36 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for reference genome - * ------------------------------------------------- - * Defines reference genomes, without using iGenome paths - * Can be used by any config that customises the base - * path using $params.genomes_base / --genomes_base - * - * CAREFUL: Some o the files might be reuiqred in the CI tests not yet implemented. They should be gradually moved to the test.config. Until then lets keep this file. - */ - -params { - genomes { - 'minimalGRCh37' { - fasta = "${params.genomes_base}/human_g1k_v37_decoy.small.fasta" - } - 'smallGRCh37' { - dbsnp = "${params.genomes_base}/dbsnp_138.b37.small.vcf.gz" - fasta = "${params.genomes_base}/human_g1k_v37_decoy.small.fasta" - germline_resource = "${params.genomes_base}/gnomAD.r2.1.1.GRCh37.small.PASS.AC.AF.only.vcf.gz" - intervals = "${params.genomes_base}/small.intervals" - known_indels = "${params.genomes_base}/Mills_1000G_gold_standard_and_1000G_phase1.indels.b37.small.vcf.gz" - snpeff_db = 'GRCh37.75' - vep_genome = 'GRCh37' - vep_species = 'homo_sapiens' - vep_cache_version = '104' - } - 'smallerGRCh37' { - fasta = "${params.genomes_base}/human_g1k_v37_decoy.small.fasta" - known_indels = "${params.genomes_base}/dbsnp_138.b37.small.vcf.gz" - } - 'custom' { - fasta = null - } - } -} diff --git a/conf/igenomes.config b/conf/igenomes.config index eda7409f20..b665518af5 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Nextflow config file for iGenomes paths -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines reference genomes using iGenome paths. Can be used by any config that customises the base path using: $params.igenomes_base / --igenomes_base @@ -12,62 +12,102 @@ params { // illumina iGenomes reference file paths genomes { 'GATK.GRCh37' { - ac_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/1000G_phase3_20130502_SNP_maf0.3.loci" - ac_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/1000G_phase3_20130502_SNP_maf0.3.loci.gc" - bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/BWAIndex/human_g1k_v37_decoy.fasta.{amb,ann,bwt,pac,sa}" + ascat_alleles = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/G1000_alleles_hg19.zip" + ascat_genome = 'hg19' + ascat_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/G1000_loci_hg19.zip" + ascat_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/GC_G1000_hg19.zip" + ascat_loci_rt = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/RT_G1000_hg19.zip" + bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/BWAIndex/" chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/Chromosomes" - chr_length = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/Length/human_g1k_v37_decoy.len" - dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf" - dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.idx" + dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz" + dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz.tbi" + dbsnp_vqsr = '--resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_138.b37.vcf.gz' dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict" fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta" fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai" - germline_resource = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GermlineResource/gnomAD.r2.1.1.GRCh37.PASS.AC.AF.only.vcf.gz" - germline_resource_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GermlineResource/gnomAD.r2.1.1.GRCh37.PASS.AC.AF.only.vcf.gz.tbi" + germline_resource = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/af-only-gnomad.raw.sites.vcf.gz" + germline_resource_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/af-only-gnomad.raw.sites.vcf.gz.tbi" intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/intervals/wgs_calling_regions_Sarek.list" - known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf" - known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.idx" + known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz" + known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz.tbi" + known_snps_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_phase1.snps.high_confidence.b37.vcf.gz' + known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz" + known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz.tbi" + known_indels_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_phase1.indels.b37.vcf.gz --resource:mills,known=false,training=true,truth=true,prior=10.0 Mills_and_1000G_gold_standard.indels.b37.vcf.gz' mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/Control-FREEC/out100m2_hg19.gem" - snpeff_db = 'GRCh37.75' - vep_cache_version = '104' + msisensor2_models = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/MSIsensor2/models_hg19_GRCh37/" + msisensorpro_scan = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/MSIsensorPro/human_g1k_v37_decoy.msisensor_scan.list" + ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/NGSCheckMate/SNP_GRCh37_hg19_woChr.bed" + snpeff_db = 'GRCh37.87' + vep_cache_version = '115' vep_genome = 'GRCh37' vep_species = 'homo_sapiens' } 'GATK.GRCh38' { - ac_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci" - ac_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci.gc" - bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/Homo_sapiens_assembly38.fasta.64.{alt,amb,ann,bwt,pac,sa}" - chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Chromosomes" - chr_length = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Length/Homo_sapiens_assembly38.len" - dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" - dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" - dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" - fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta" - fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai" - germline_resource = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GermlineResource/gnomAD.r2.1.1.GRCh38.PASS.AC.AF.only.vcf.gz" - germline_resource_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GermlineResource/gnomAD.r2.1.1.GRCh38.PASS.AC.AF.only.vcf.gz.tbi" - intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/intervals/wgs_calling_regions.hg38.bed" - known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" - known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" - mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Control-FREEC/out100m2_hg38.gem" - snpeff_db = 'GRCh38.99' - vep_cache_version = '104' - vep_genome = 'GRCh38' - vep_species = 'homo_sapiens' + ascat_alleles = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/G1000_alleles_hg38.zip" + ascat_genome = 'hg38' + ascat_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/G1000_loci_hg38.zip" + ascat_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/GC_G1000_hg38.zip" + ascat_loci_rt = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/RT_G1000_hg38.zip" + bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAmem2Index/" + cf_chrom_len = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Length/Homo_sapiens_assembly38.len" + chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Chromosomes" + dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" + dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" + dbsnp_vqsr = '--resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_146.hg38.vcf.gz' + dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" + dragmap = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/dragmap/" + fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta" + fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai" + germline_resource = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/af-only-gnomad.hg38.vcf.gz" + germline_resource_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/af-only-gnomad.hg38.vcf.gz.tbi" + intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/intervals/wgs_calling_regions_noseconds.hg38.bed" + known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" + known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" + known_indels_vqsr = '--resource:gatk,known=false,training=true,truth=true,prior=10.0 Homo_sapiens_assembly38.known_indels.vcf.gz --resource:mills,known=false,training=true,truth=true,prior=10.0 Mills_and_1000G_gold_standard.indels.hg38.vcf.gz' + known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz" + known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz.tbi" + known_snps_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_omni2.5.hg38.vcf.gz' + mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Control-FREEC/out100m2_hg38.gem" + msisensor2_models = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/MSIsensor2/models_hg38//" + msisensorpro_scan = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/MSIsensorPro/Homo_sapiens_assembly38.msisensor_scan.list" + ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/NGSCheckMate/SNP_GRCh38_hg38_wChr.bed" + pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz" + pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi" + sentieon_dnascope_model = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Sentieon/SentieonDNAscopeModel1.1.model" + snpeff_db = 'GRCh38.99' + vep_cache_version = '115' + vep_genome = 'GRCh38' + vep_species = 'homo_sapiens' } 'Ensembl.GRCh37' { - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/NGSCheckMate/SNP_GRCh37_hg19_woChr.bed" readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" + snpeff_db = 'GRCh37.87' + vep_cache_version = '115' + vep_genome = 'GRCh37' + vep_species = 'homo_sapiens' } 'NCBI.GRCh38' { - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + ngscheckmate_bed ="${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/NGSCheckMate/SNP_GRCh38_hg38_wChr.bed" + snpeff_db = 'GRCh38.99' + vep_cache_version = '115' + vep_genome = 'GRCh38' + vep_species = 'homo_sapiens' + } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" } 'GRCm38' { - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" chr_dir = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Chromosomes" - chr_length = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Length/GRCm38.len" dbsnp = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.snps_all.dbSNP142.vcf.gz" dbsnp_tbi = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.snps_all.dbSNP142.vcf.gz.tbi" dict = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.dict" @@ -84,169 +124,212 @@ params { vep_species = 'mus_musculus' } 'TAIR10' { + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" } 'EB2' { + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" } 'UMD3.1' { + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" + snpeff_db = 'UMD3.1.75' + vep_cache_version = '94' + vep_genome = 'UMD3.1' + vep_species = 'bos_taurus' } 'WBcel235' { - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" snpeff_db = 'WBcel235.99' - vep_cache_version = '104' + vep_cache_version = '115' vep_genome = 'WBcel235' vep_species = 'caenorhabditis_elegans' } 'CanFam3.1' { + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" + snpeff_db = 'CanFam3.1.99' + vep_cache_version = '104' + vep_genome = 'CanFam3.1' + vep_species = 'canis_lupus_familiaris' } 'GRCz10' { + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" } 'BDGP6' { + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" } 'EquCab2' { + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" } 'EB1' { + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" } 'Galgal4' { + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" } 'Gm01' { + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" } 'Mmul_1' { + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" } 'IRGSP-1.0' { + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" } 'CHIMP2.1.4' { + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" } 'Rnor_5.0' { + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" } 'Rnor_6.0' { + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" } 'R64-1-1' { + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + snpeff_db = 'R64-1-1.99' + vep_cache_version = '115' + vep_genome = 'R64-1-1' + vep_species = 'saccharomyces_cerevisiae' } 'EF2' { + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" } 'Sbi1' { + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" } 'Sscrofa10.2' { + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" } 'AGPv3' { + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" } 'hg38' { + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" + snpeff_db = 'GRCh38.99' + vep_cache_version = '115' + vep_genome = 'GRCh38' + vep_species = 'homo_sapiens' } 'hg19' { + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" + snpeff_db = 'GRCh37.87' + vep_cache_version = '115' + vep_genome = 'GRCh37' + vep_species = 'homo_sapiens' } 'mm10' { + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" + snpeff_db = 'GRCm38.99' + vep_cache_version = '102' + vep_genome = 'GRCm38' + vep_species = 'mus_musculus' } 'bosTau8' { + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" } 'ce10' { + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" } 'canFam3' { + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" } 'danRer10' { + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" } 'dm6' { + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" } 'equCab2' { + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" } 'galGal4' { + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" } 'panTro4' { + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" } 'rn6' { + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" } 'sacCer3' { + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" } 'susScr3' { + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/genome.fa.{amb,ann,bwt,pac,sa}" readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" } + 'testdata.nf-core.sarek' { + dbsnp = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" + dbsnp_tbi = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi" + dbsnp_vqsr = '--resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_146.hg38.vcf.gz' + dict = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.dict" + fasta = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.fasta" + fasta_fai = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.fasta.fai" + germline_resource = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz" + germline_resource_tbi = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz.tbi" + intervals = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.interval_list" + known_indels = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz" + known_indels_tbi = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi" + known_indels_vqsr = '--resource:mills,known=false,training=true,truth=true,prior=10.0 mills_and_1000G.indels.vcf.gz' + ngscheckmate_bed = "${params.igenomes_base}/genomics/homo_sapiens/genome/chr21/germlineresources/SNP_GRCh38_hg38_wChr.bed" + snpeff_db = 'WBcel235.99' + vep_cache_version = '114' + vep_genome = 'WBcel235' + vep_species = 'caenorhabditis_elegans' + } } } diff --git a/conf/igenomes_ignored.config b/conf/igenomes_ignored.config new file mode 100644 index 0000000000..b4034d8243 --- /dev/null +++ b/conf/igenomes_ignored.config @@ -0,0 +1,9 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for iGenomes paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Empty genomes dictionary to use when igenomes is ignored. +---------------------------------------------------------------------------------------- +*/ + +params.genomes = [:] diff --git a/conf/modules.config b/conf/modules.config deleted file mode 100644 index e4ef7b0f79..0000000000 --- a/conf/modules.config +++ /dev/null @@ -1,649 +0,0 @@ -/* -======================================================================================== - Config file for defining DSL2 per module options and publishing paths -======================================================================================== - Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.suffix = File name ext.suffix output files. Not available for nf-core modules - ext.prefix = File name prefix for output files. ----------------------------------------------------------------------------------------- -*/ - -process { - - publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - - withName: CUSTOM_DUMPSOFTWAREVERSIONS { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: 'copy', - pattern: '*_versions.yml' - ] - } -} - -// PREPARE_GENOME -process { - - withName: 'BWAMEM1_INDEX' { - publishDir = [ - path: { "${params.outdir}/reference/bwa" }, - enabled: "${params.save_reference}", - pattern: "bwa", - mode: 'copy' - ] - } - - withName: 'BWAMEM2_INDEX' { - publishDir = [ - path: { "${params.outdir}/reference/bwamem2" }, - enabled: "${params.save_reference}", - pattern: "bwa", - mode: 'copy' - ] - - } - withName: 'CREATE_INTERVALS_BED' { - publishDir = [ - path: { "${params.outdir}/reference/intervals" }, - enabled: "${params.save_reference}", - pattern: "*bed", - mode: 'copy' - ] - } - - withName: 'GATK4_CREATESEQUENCEDICTIONARY' { - publishDir = [ - path: { "${params.outdir}/reference/gatk4" }, - enabled: "${params.save_reference}", - pattern: "*dict", - mode: 'copy' - ] - } - - withName: 'MSISENSORPRO_SCAN' { - publishDir = [ - path: { "${params.outdir}/reference/msi" }, - enabled: "${params.save_reference}", - pattern: "*list", - mode: 'copy' - ] - } - - withName: 'SAMTOOLS_FAIDX' { - publishDir = [ - path: { "${params.outdir}/reference/fai" }, - enabled: "${params.save_reference}", - pattern: "*fai", - mode: 'copy' - ] - } - - withName: 'TABIX_BGZIPTABIX' { - publishDir = [ - path: { "${params.outdir}/reference/target" }, - enabled: "${params.save_reference}", - pattern: "*bed.gz", - mode: 'copy' - ] - ext.prefix = {"${meta.id}.bed"} - - } - - withName: 'TABIX_DBSNP' { - publishDir = [ - path: { "${params.outdir}/reference/dbsnp" }, - enabled: "${params.save_reference}", - pattern: "*vcf.gz.tbi", - mode: 'copy' - ] - } - - withName: 'TABIX_GERMLINE_RESOURCE' { - publishDir = [ - path: { "${params.outdir}/reference/germline_resource" }, - enabled: "${params.save_reference}", - pattern: "*vcf.gz.tbi", - mode: 'copy' - ] - } - - withName: 'TABIX_KNOWN_INDELS' { - publishDir = [ - path: { "${params.outdir}/reference/known_indels" }, - enabled: "${params.save_reference}", - pattern: "*vcf.gz.tbi", - mode: 'copy' - ] - } - - withName: 'TABIX_PON' { - publishDir = [ - path: { "${params.outdir}/reference/pon" }, - enabled: "${params.save_reference}", - pattern: "*vcf.gz.tbi", - mode: 'copy' - ] - } -} - -// UMI Subworkflow -process{ - withName: 'BAM2FASTQ' { - ext.args = '-T RX' - } - - withName: 'SAMBLASTER' { - ext.prefix = {"${meta.id}_unsorted_tagged"} - ext.args = '-M --addMateTags' - } - - withName: 'CALLUMICONSENSUS' { - ext.args = '-M 1 -S Coordinate' - ext.prefix = {"${meta.id}_umi-consensus"} - } -} - -if(params.umi_read_structure){ - process { - withName: "NFCORE_SAREK:SAREK:CREATE_UMI_CONSENSUS:BWA.*_MEM" { - ext.args = '-p -C -M' - ext.prefix = {"${meta.id}.umi_unsorted"} - ext.args2 = '-bS' - } - } -} - -//BAMTOFASTQ -process { - withName: 'SAMTOOLS_VIEW_MAP_MAP' { - ext.args = '-b -f1 -F12' - ext.prefix = {"${meta.id}.map_map"} - } - withName: 'SAMTOOLS_VIEW_UNMAP_UNMAP' { - ext.args = '-b -f12 -F256' - ext.prefix = {"${meta.id}.unmap_unmap"} - } - withName: 'SAMTOOLS_VIEW_UNMAP_MAP' { - ext.args = '-b -f4 -F264' - ext.prefix = {"${meta.id}.unmap_map"} - } - withName: 'SAMTOOLS_VIEW_MAP_UNMAP' { - ext.args = '-b -f8 -F260' - ext.prefix = {"${meta.id}.map_unmap"} - } - withName: 'SAMTOOLS_FASTQ_UNMAPPED'{ - ext.args2 = '-N' - ext.prefix = {"${meta.id}.unmapped"} - } - withName: 'SAMTOOLS_FASTQ_MAPPED'{ - ext.args2 = '-N' - ext.prefix = {"${meta.id}.mapped"} - } -} - -// QC_TRIM -process { - withName: 'FASTQC' { - ext.args = '--quiet' - publishDir = [ - path: { "${params.outdir}/reports/fastqc/${meta.id}" }, - mode: 'copy', - enabled: true - ] - } - withName: 'TRIMGALORE' { - ext.args = '--fastqc' - publishDir = [ - path: { "${params.outdir}/trimgalore/${meta.id}" }, - mode: 'copy', - enabled: true - ] - } - // withName: 'MULTIQC' { - // ext.args = '' - // } -} - - -// MAPPING -process { - - withName: "BWA.*MEM" { - ext.args = { meta.status == 1 ? '-K 100000000 -M -B 3' : '-K 100000000 -M' } - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.findAll(/part_([0-9]+)?/).last()) : "" } - } - - withName: 'INDEX_MAPPING' { - publishDir = [ - path: { "${params.outdir}/preprocessing/${meta.id}/mapped" }, - enabled: true, - mode: 'copy', - pattern: "*{bam,bai}" - ] - } - - withName: "SEQKIT_SPLIT2" { - ext.args = { "--by-size ${params.split_fastq}" } - publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: 'copy', - enabled: "${params.save_split_fastqs}", - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} - -// Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof -// However if it's skipped, reads need to be coordinate-sorted -// Spark can be used also for BQSR, therefore check for both -// Only name sort if Spark for Markduplicates + duplicate marking is not skipped -if (('markduplicates' in params.use_gatk_spark) && (!params.skip_markduplicates)) { - process { - withName: "BWA.*_MEM" { ext.args2 = '-n' } - } -} - -// MARKDUPLICATES -process { - withName: 'GATK4_ESTIMATELIBRARYCOMPLEXITY' { - ext.prefix = {"${meta.id}.md"} - publishDir = [ - path: { "${params.outdir}/preprocessing/${meta.id}/markduplicates" }, - enabled: true, - mode: 'copy', - pattern: "*{metrics}" - ] - } - withName: 'GATK4_MARKDUPLICATES' { - ext.args = '-REMOVE_DUPLICATES false -VALIDATION_STRINGENCY LENIENT' - ext.prefix = {"${meta.id}.md"} - //publishDir = [ - // path: { "${params.outdir}/preprocessing/${meta.id}/markduplicates" }, - // enabled: false - //] - } - withName: 'GATK4_MARKDUPLICATES_SPARK' { - ext.args = '--remove-sequencing-duplicates false -VS LENIENT' - ext.suffix = '.md' - publishDir = [ - path: { "${params.outdir}/preprocessing/${meta.id}/markduplicates" }, - enabled: true, - mode: 'copy', - pattern: "*{cram,crai}" - ] - } - withName: 'QUALIMAP_BAMQC' { - ext.args = '--paint-chromosome-limits --genome-gc-distr HUMAN -skip-duplicated --skip-dup-mode 0 -outformat HTML' - ext.suffix = '.mapped' - publishDir = [ - path: { "${params.outdir}/reports/qualimap/${meta.id}" }, - mode: 'copy', - enabled: true - ] - } - withName: 'SAMTOOLS_STATS' { - publishDir = [ - path: { "${params.outdir}/reports/samtools_stats/${meta.id}" }, - enabled: true, - mode: 'copy' - ] - } - withName: 'DEEPTOOLS_BAMCOVERAGE' { - publishDir = [ - path: { "${params.outdir}/reports/deeptools/${meta.id}" }, - enabled: true, - mode: 'copy' - ] - } - withName: 'SAMTOOLS_BAM_TO_CRAM|SAMTOOLS_BAM_TO_CRAM_SPARK' { - ext.suffix = '.md' - publishDir = [ - path: { "${params.outdir}/preprocessing/${meta.id}/markduplicates" }, - enabled: true, - mode: 'copy', - pattern: "*{cram,crai}" - ] - } - withName: 'INDEX_MARKDUPLICATES' { - publishDir = [ - path: { "${params.outdir}/preprocessing/${meta.id}/markduplicates" }, - enabled: true, - mode: 'copy', - pattern: "*{cram,crai}" - ] - } -} - -// PREPARE_RECALIBRATION -process { - withName: 'BASERECALIBRATOR|BASERECALIBRATOR_SPARK|GATHERBQSRREPORTS' { - publishDir = [ - path: { "${params.outdir}/preprocessing/${meta.id}/recal_table" }, - enabled: true, - mode: 'copy', - pattern: "*.table" - ] - ext.prefix = {"${meta.id}.recal"} - } -} - -// RECALIBRATE -process { - withName: 'APPLYBQSR|APPLYBQSR_SPARK' { - ext.prefix = {"${meta.id}.recal"} - } - withName: 'SAMTOOLS_MERGE_CRAM' { - ext.suffix = '.recal' - publishDir = [ - path: { "${params.outdir}/preprocessing/${meta.id}/recalibrated" }, - enabled: true, - mode: 'copy', - pattern: "*cram" - ] - } - withName: 'QUALIMAP_BAMQC_CRAM' { - ext.args = '--paint-chromosome-limits --genome-gc-distr HUMAN -skip-duplicated --skip-dup-mode 0 -outformat HTML' - ext.suffix = '.recal' - publishDir = [ - path: { "${params.outdir}/reports/qualimap/${meta.id}" }, - enabled: true, - mode: 'copy' - ] - } - withName: 'INDEX_RECALIBRATE' { - ext.suffix = 'recal' - publishDir = [ - path: { "${params.outdir}/preprocessing/${meta.id}/recalibrated" }, - enabled: true, - pattern: "*{recal.cram,recal.cram.crai}" - ] - } - withName: 'SAMTOOLS_STATS' { - publishDir = [ - path: { "${params.outdir}/reports/samtools_stats/${meta.id}" }, - enabled: true, - mode: 'copy' - ] - } -} - -// GERMLINE & TUMOR ONLY Variant_Calling -process{ - withName: 'CONCAT_VCF_DEEPVARIANT' { - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.sample}/deepvariant" }, - enabled: "!${params.no_intervals}", - mode: 'copy' - ] - ext.args = { params.no_intervals ? "-n" : "" } - ext.prefix = {"${meta.sample}"} - } - withName: 'CONCAT_GVCF_DEEPVARIANT' { - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.sample}/deepvariant" }, - enabled: "!${params.no_intervals}", - mode: 'copy' - ] - ext.args = { params.no_intervals ? "-n" : "" } - ext.prefix = {"${meta.sample}.g"} - } - - withName: 'CONCAT_VCF_FREEBAYES' { - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.sample}/freebayes" }, - enabled: "!${params.no_intervals}", - mode: 'copy' - ] - ext.args = { params.no_intervals ? "-n" : "" } - ext.prefix = {"${meta.sample}"} - - } - withName: 'CONCAT_VCF_HAPLOTYPECALLER' { - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.sample}/haplotypecaller" }, - enabled: "!${params.no_intervals}", - mode: 'copy' - ] - ext.args = { params.no_intervals ? "-n" : "" } - ext.prefix = {"${meta.sample}.g"} - } - withName: 'CONCAT_VCF_MANTA_.*' { - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.sample}/manta" }, - enabled: "!${params.no_intervals}", - mode: 'copy' - ] - ext.args = { params.no_intervals ? "-n" : "" } - ext.prefix = {"${meta.sample}"} - } - withName: 'CONCAT_VCF_MUTECT2' { - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.sample}/mutect2" }, - enabled: "!${params.no_intervals}", - mode: 'copy' - ] - ext.args = { params.no_intervals ? "-n" : "" } - ext.prefix = {"${meta.sample}"} - } - withName: 'CONCAT_VCF_STRELKA' { - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.sample}/strelka" }, - enabled: "!${params.no_intervals}", - mode: 'copy' - ] - ext.args = { params.no_intervals ? "-n" : "" } - ext.prefix = {"${meta.sample}"} - } - withName: 'DEEPVARIANT' { - ext.args = { params.wes ? "--model_type WES" : "--model_type WGS" } - } - withName: 'FREEBAYES' { - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.sample}/manta" }, - enabled: "${params.no_intervals}", - mode: 'copy' - ] - ext.args = '--min-alternate-fraction 0.1 --min-mapping-quality 1' - } - //withName: 'GATK4_CALCULATECONTAMINATION'{ - // publishDir = [ enabled: false ] - // ext.args = '' - //} - //withName: 'GATK4_FILTERMUTECTCALLS'{ - // publishDir = [ enabled: false ] - // ext.args = '' - //} - //withName: 'GATK4_GETPILEUPSUMMARIES'{ - // publishDir = [ enabled: false ] - // ext.args = '' - //} - withName: 'GATK4_MUTECT2'{ - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.sample}/mutect2" }, - enabled: "${params.no_intervals}", - mode: 'copy' - ] - } - //withName: 'GENOMICSDBIMPORT' { - // - //} - withName: 'HAPLOTYPECALLER' { - ext.args = '-ERC GVCF' - } - withName: 'MANTA_GERMLINE|MANTA_TUMORONLY|MANTA_SOMATIC' { - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.sample}/manta" }, - enabled: "${params.no_intervals}", - mode: 'copy' - ] - ext.args = { params.wes ? "--exome" : "" } - } - withName: 'STRELKA_GERMLINE|STRELKA_TUMORONLY|STRELKA_SOMATIC|STRELKA_BP' { - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.sample}/strelka" }, - enabled: "${params.no_intervals}", - mode: 'copy' - ] - ext.args = { params.wes ? "--exome" : "" } - } - withName : 'TABIX_DEEPVARIANT_VCF|TABIX_DEEPVARIANT_GVCF' { - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.sample}/deepvariant" }, - enabled: true, - ] - ext.prefix = {"${meta.sample}"} - } - withName : 'TABIX_FREEBAYES' { - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.sample}/freebayes" }, - enabled: true, - mode: 'copy' - ] - ext.prefix = {"${meta.sample}"} - } - withName : 'TABIX_HAPLOTYPECALLER' { - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.sample}/haplotypecaller" }, - enabled: true, - mode: 'copy' - ] - ext.prefix = {"${meta.sample}"} - } - //withName: 'TIDDIT_SV' { - // publishDir = [ - // path: { "${params.outdir}/variant_calling/${meta.sample}/tiddit" }, - // enabled: true - // ] - //} - -} - -// TUMOR_VARIANT_CALLING -process{ - - withName: 'MERGEMUTECTSTATS' { - ext.prefix = { "${meta.sample}.vcf.gz" } - } - withName: 'GATHERPILEUPSUMMARIES' { - ext.prefix = { "${meta.sample}.table" } - } -} - -// PAIR_VARIANT_CALLING - -process{ - - withName: 'NFCORE_SAREK:SAREK:PAIR_VARIANT_CALLING:GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING:MUTECT2'{ - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" }, - enabled: "${params.no_intervals}", - mode: 'copy' - ] - } - withName: 'NFCORE_SAREK:SAREK:PAIR_VARIANT_CALLING:GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING:CONCAT_VCF_MUTECT2' { - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" }, - enabled: "!${params.no_intervals}", - mode: 'copy' - ] - ext.args = { params.no_intervals ? "-n" : "" } - ext.prefix = {"${meta.id}"} - } - withName: 'GATK4_MERGEMUTECTSTATS' { - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" }, - enabled: true, - mode: 'copy' - ] - } - withName: 'GATK4_FILTERMUTECTCALLS'{ - publishDir = [ - path: { "${params.outdir}/variant_calling/${meta.id}/mutect2" }, - enabled: true, - mode: 'copy' - ] - ext.prefix = {"${meta.id}.filtered."} - } -} - -// withName: manta_somatic { -// publishDir = [ -// path: { "${params.outdir}/variant_calling' -// publish_files = ['vcf.gz':'manta', 'vcf.gz.tbi':'manta'] -// } -// withName: msisensorpro_msi { -// publishDir = [ -// path: { "${params.outdir}/variant_calling' -// publish_files = ['list':'msisensorpro'] -// } -// withName: strelka_somatic { -// publishDir = [ -// path: { "${params.outdir}/variant_calling' -// publish_files = ['vcf.gz':'strelka', 'vcf.gz.tbi':'strelka'] -// } -// withName: strelka_somatic_bp { -// publishDir = [ -// path: { "${params.outdir}/variant_calling' -// publish_files = ['vcf.gz':'strelka', 'vcf.gz.tbi':'strelka'] -// } -// withName: mutect2_somatic { -// publishDir = [ -// path: { "${params.outdir}/variant_calling' -// publish_files = ['vcf.gz':'mutect2', 'vcf.gz.tbi':'mutect2'] -// } - -// ANNOTATE -process { - - withName: 'ENSEMBLVEP' { - ext.args = '--everything --filter_common --per_gene --total_length --offline' - container = { "nfcore/vep:104.3.${params.genome}" } - } - - withName: 'SNPEFF' { - ext.args = '-nodownload -canon -v' - container = { "nfcore/snpeff:5.0.${params.genome}" } - } - - withName: 'ANNOTATION_BGZIPTABIX' { - publishDir = [ - path: { "${params.outdir}/annotation/${meta.id}" }, - enabled: true, - mode: 'copy', - pattern: "*{gz,gz.tbi}" - ] - } - -} - -if ((params.tools) && (params.tools.contains('snpeff') || params.tools.contains('merge'))) { - process { - withName: 'NFCORE_SAREK:SAREK:ANNOTATE:ANNOTATION_SNPEFF:ANNOTATION_BGZIPTABIX' { - ext.prefix = {"${meta.id}_snpEff.ann.vcf"} - } - } -} - -if ((params.tools) && (params.tools.contains('vep'))) { - process { - withName: 'NFCORE_SAREK:SAREK:ANNOTATE:ANNOTATION_ENSEMBLVEP:ANNOTATION_BGZIPTABIX' { - ext.prefix = {"${meta.id}_VEP.ann.vcf"} - } - } -} - -if ((params.tools) && (params.tools.contains('merge'))) { - process { - withName: 'NFCORE_SAREK:SAREK:ANNOTATE:MERGE_ANNOTATE:ANNOTATION_BGZIPTABIX' { - ext.prefix = {"${meta.id}_snpEff_VEP.ann.vcf"} - } - } - -} diff --git a/conf/modules/aligner.config b/conf/modules/aligner.config new file mode 100644 index 0000000000..2af1fa8047 --- /dev/null +++ b/conf/modules/aligner.config @@ -0,0 +1,89 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MAPPING + +process { + + withName: 'BWAMEM1_MEM' { + ext.when = { params.aligner == 'bwa-mem' } + } + + withName: 'BWAMEM2_MEM' { + ext.when = { params.aligner == 'bwa-mem2' } + } + + withName: 'DRAGMAP_ALIGN' { + ext.args = { "--RGSM ${meta.patient}_${meta.sample} --RGID ${meta.read_group}" } + ext.when = { params.aligner == 'dragmap' } + } + + withName: 'SENTIEON_BWAMEM' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]).concat('.bam') : "${meta.id}.sorted.bam" } + ext.when = { params.aligner == 'sentieon-bwamem' } + } + + withName: 'BWAMEM.*_MEM|DRAGMAP_ALIGN|SENTIEON_BWAMEM' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*bam", + // Only save if save_output_as_bam AND + // (save_mapped OR no_markduplicates OR sentieon_dedup) AND + // only a single BAM file per sample + saveAs: { + if (params.save_output_as_bam && + ( + params.save_mapped || + (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) && + !(params.tools && params.tools.split(',').contains('sentieon_dedup')) + ) && (meta.size * meta.num_lanes == 1) + ) { "mapped/${meta.id}/${it}" } + else { null } + } + ] + } + + withName: 'BWAMEM.*_MEM|DRAGMAP_ALIGN' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "${meta.id}.sorted" } + // Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof + // However if it's skipped, reads need to be coordinate-sorted + // Only name sort if Spark for Markduplicates + duplicate marking is not skipped + // Currently SENTIEON_BWAMEM only supports coordinate sorting the reads. + ext.args2 = { params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates') && (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('markduplicates'))) ? '-n' : '' } + } + + withName: 'BWAMEM.*_MEM|SENTIEON_BWAMEM' { + // Using -B 3 for tumor samples + ext.args = { meta.status == 1 ? "-K 100000000 -Y -B 3 -R ${meta.read_group}" : "-K 100000000 -Y -R ${meta.read_group}" } + } + + withName: 'FGBIO_COPYUMIFROMREADNAME' { + ext.prefix = { params.split_fastq > 1 ? bam.name.replaceFirst(/\.bam$/, '_umi_extracted') : "${meta.id}_umi_extracted" } + } + + withName: 'MERGE_BAM|INDEX_MERGE_BAM' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*{bam,bai}", + // Only save if (save_output_as_bam AND (no_markduplicates OR save_mapped )) + saveAs: { (params.save_output_as_bam && (params.save_mapped || params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) ? "mapped/${meta.id}/${it}" : null } + ] + } + + withName: 'MERGE_BAM' { + ext.prefix = { "${meta.id}.sorted" } + } + +} diff --git a/conf/modules/aligner_parabricks.config b/conf/modules/aligner_parabricks.config new file mode 100644 index 0000000000..e06f570105 --- /dev/null +++ b/conf/modules/aligner_parabricks.config @@ -0,0 +1,63 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MAPPING WITH PARABRICKS + +process { + withName: 'PARABRICKS_FQ2BAM' { + ext.args = { [ + // Using specific read group tags for mutect compability + "--read-group-id-prefix ${meta.sample_lane_id}", + "--read-group-sm ${meta.patient}_${meta.sample}", + "--read-group-lb ${meta.sample}", + "--read-group-pl ${params.seq_platform}", + // Using -B 3 for tumor samples + meta.status == 1 ? "--bwa-options='-K 100000000 -Y -B 3'" : "--bwa-options='-K 100000000 -Y'", + ].join(' ').trim() } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/parabricks/${meta.id}/" }, + pattern: "*{cram,crai}", + saveAs: { params.save_mapped ? it : null } + ] + } + + withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_PARABRICKS:CRAM_MERGE_INDEX_SAMTOOLS:MERGE_CRAM' { + ext.when = { meta.num_lanes > 1 } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/parabricks/${meta.id}/" }, + pattern: "*cram", + saveAs: { params.save_mapped ? it : null } + ] + } + + withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_PARABRICKS:CRAM_MERGE_INDEX_SAMTOOLS:INDEX_CRAM' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/parabricks/${meta.id}/" }, + pattern: "*{cram,crai}", + saveAs: { params.save_mapped ? it : null } + ] + } + + withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_PARABRICKS:CRAM_TO_BAM' { + //ext.when = { params.save_output_as_bam } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/parabricks/${meta.id}/" }, + pattern: "*{bam,bam.bai}", + saveAs: { params.save_output_as_bam ? it : null } + ] + } +} diff --git a/conf/modules/alignment_to_fastq.config b/conf/modules/alignment_to_fastq.config new file mode 100644 index 0000000000..32878e5342 --- /dev/null +++ b/conf/modules/alignment_to_fastq.config @@ -0,0 +1,85 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// BAM TO FASTQ + +process { + + withName: 'COLLATE_FASTQ_MAP' { + ext.args2 = { '-N' } + ext.prefix = { "${meta.id}.mapped" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'COLLATE_FASTQ_UNMAP' { + ext.args2 = { '-N' } + ext.prefix = { "${meta.id}.unmapped" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_MAP_MAP' { + ext.args = { '-b -f1 -F12' } + ext.prefix = { "${meta.id}.map_map" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_MAP_UNMAP' { + ext.args = { '-b -f8 -F260' } + ext.prefix = { "${meta.id}.map_unmap" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_UNMAP_MAP' { + ext.args = { '-b -f4 -F264' } + ext.prefix = { "${meta.id}.unmap_map" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_UNMAP_UNMAP' { + ext.args = { '-b -f12 -F256' } + ext.prefix = { "${meta.id}.unmap_unmap" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_MERGE_UNMAP' { + ext.prefix = { "${meta.id}.merged_unmap" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + withName: 'CAT_FASTQ' { + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } +} diff --git a/conf/modules/annotate.config b/conf/modules/annotate.config new file mode 100644 index 0000000000..28e07d832a --- /dev/null +++ b/conf/modules/annotate.config @@ -0,0 +1,129 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// ANNOTATE + +process { + + // SNPEFF + withName: 'SNPEFF_SNPEFF' { + ext.args = { '-nodownload -canon -v' } + ext.prefix = { meta.variantcaller == 'consensus' ? "${meta.id}.consensus_snpEff" : vcf.baseName - '.vcf' + '_snpEff' } + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/snpeff/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{csv,html,genes.txt}", + saveAs: { params.tools.split(',').contains('snpeff') ? it : null } + ] + ] + } + + // VEP + withName: 'ENSEMBLVEP_VEP' { + ext.args = { ["--stats_file ", + meta.variantcaller == 'consensus' ? "${meta.id}.consensus_VEP.ann.summary.html" : vcf.baseName - '.vcf' + '_VEP.ann.summary.html', + (params.vep_condel && params.condel_config) ? "--plugin Condel,${params.condel_config},b" : '', + (params.vep_dbnsfp && params.dbnsfp && !params.dbnsfp_consequence) ? "--plugin dbNSFP,${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_dbnsfp && params.dbnsfp && params.dbnsfp_consequence) ? "--plugin dbNSFP,'consequence=${params.dbnsfp_consequence}',${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_loftee) ? "--plugin LoF,loftee_path:/usr/local/share/ensembl-vep-${params.vep_version}" : '', + (params.vep_mastermind && params.mastermind_file) ? "--plugin Mastermind,file=${params.mastermind_file},mutations=${params.mastermind_mutations ? 1 : 0},var_iden=${params.mastermind_var_iden ? 1 : 0},url=${params.mastermind_url ? 1 : 0}" : '', + (params.vep_phenotypes) ? "--plugin Phenotypes${params.phenotypes_file ? ",file=${params.phenotypes_file.split("/")[-1]}" : ''}${params.phenotypes_include_types ? ",include_types=${params.phenotypes_include_types}" : ''}" : '', + (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? "--plugin SpliceAI,snv=${params.spliceai_snv.split("/")[-1]},indel=${params.spliceai_indel.split("/")[-1]}" : '', + (params.vep_spliceregion) ? '--plugin SpliceRegion' : '', + (params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf', + (params.vep_custom_args) ?: '' + ].join(' ').trim() } + // If just VEP: _VEP.ann.vcf + ext.prefix = { meta.variantcaller == 'consensus' ? "${meta.id}.consensus_VEP.ann" : vcf.baseName - '.vcf' + '_VEP.ann' } + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/EnsemblVEP/${meta.variantcaller}/${meta.id}/" }, + pattern: "*html" + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz,tbi}" + ] + ] + } + + // BCFTOOLS ANNOTATE + withName: 'NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:BCFTOOLS_ANNOTATE' { + ext.args = { '--output-type z --write-index=tbi' } + ext.prefix = { meta.variantcaller == 'consensus' ? "${meta.id}.consensus_BCF.ann" : input.baseName - '.vcf' + '_BCF.ann' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz,gz.tbi}" + ] + } + + // SNPSIFT ANNOTATE + withName: 'NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:SNPSIFT_ANNMEM' { + ext.prefix = { vcf.baseName - '.ann' - '.vcf' + '_snpSift.ann' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz,gz.tbi}" + ] + } + + withName: 'NFCORE_SAREK:PREPARE_SNPSIFT_DATABASES:SNPSIFT_ANNMEMCREATE' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/snpsift" }, + saveAs: { params.save_reference ? it : null } + ] + } + + // SNPEFF THEN VEP + withName: 'NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:VCF_ANNOTATE_MERGE' { + ext.args = { [ + "--stats_file ${meta.variantcaller == 'consensus' ? "${meta.id}.consensus_snpEff_VEP.ann" : vcf.baseName - '.ann.vcf' + '_VEP.ann'}.summary.html", + (params.vep_condel && params.condel_config) ? "--plugin Condel,${params.condel_config},b" : '', + (params.vep_dbnsfp && params.dbnsfp && !params.dbnsfp_consequence) ? "--plugin dbNSFP,${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_dbnsfp && params.dbnsfp && params.dbnsfp_consequence) ? "--plugin dbNSFP,'consequence=${params.dbnsfp_consequence}',${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_loftee) ? "--plugin LoF,loftee_path:/usr/local/share/ensembl-vep-${params.vep_version}" : '', + (params.vep_mastermind && params.mastermind_file) ? "--plugin Mastermind,file=${params.mastermind_file},mutations=${params.mastermind_mutations ? 1 : 0},var_iden=${params.mastermind_var_iden ? 1 : 0},url=${params.mastermind_url ? 1 : 0}" : '', + (params.vep_phenotypes) ? "--plugin Phenotypes${params.phenotypes_file ? ",file=${params.phenotypes_file.split("/")[-1]}" : ''}${params.phenotypes_include_types ? ",include_types=${params.phenotypes_include_types}" : ''}" : '', + (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? "--plugin SpliceAI,snv=${params.spliceai_snv.split("/")[-1]},indel=${params.spliceai_indel.split("/")[-1]}" : '', + (params.vep_spliceregion) ? '--plugin SpliceRegion' : '', + (params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf', + (params.vep_custom_args) ?: '' + ].join(' ').trim() } + // If merge: Output file will have format *_snpEff_VEP.ann.vcf, *_snpEff_VEP.ann.json or *_snpEff_VEP.ann.tab + ext.prefix = { meta.variantcaller == 'consensus' ? "${meta.id}.consensus_snpEff_VEP.ann" : vcf.baseName - '.ann.vcf' + '_VEP.ann' } + } + + // ALL ANNOTATION TOOLS + withName: 'NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:.*:(TABIX_BGZIPTABIX|TABIX_TABIX)' { + ext.prefix = { input.name - '.vcf' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz.tbi}" + ] + } + + withName: 'NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:VCF_ANNOTATE_SNPEFF:TABIX_BGZIPTABIX' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz,gz.tbi}", + saveAs: { params.tools.split(',').contains('snpeff') ? it : null } + ] + } +} diff --git a/conf/modules/ascat.config b/conf/modules/ascat.config new file mode 100644 index 0000000000..4df9824274 --- /dev/null +++ b/conf/modules/ascat.config @@ -0,0 +1,37 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// ASCAT + +process { + + withName: 'ASCAT' { + ext.args = { [ + "gender": meta.sex, + "genomeVersion": params.ascat_genome, + "purity": params.ascat_purity, + "ploidy": params.ascat_ploidy, + "minCounts": params.ascat_min_counts, + "chrom_names": meta.sex == 'XX' ? "c(1:22, 'X')" : "c(1:22, 'X', 'Y')", // for faster testing use "c('21', '22')" + "min_base_qual": params.ascat_min_base_qual, + "min_map_qual": params.ascat_min_map_qual + ] + } + ext.when = { params.tools && params.tools.split(',').contains('ascat') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/ascat/${meta.id}/" }, + pattern: "*{png,cnvs.txt,metrics.txt,purityploidy.txt,segments.txt,LogR.txt,BAF.txt}" + ] + } +} diff --git a/conf/modules/cnvkit.config b/conf/modules/cnvkit.config new file mode 100644 index 0000000000..2bf455faa7 --- /dev/null +++ b/conf/modules/cnvkit.config @@ -0,0 +1,67 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// CNVKIT + +process { + + withName: 'CNVKIT_BATCH' { + ext.args = { params.wes ? "--method hybrid --diagram --scatter" : "--method wgs --diagram --scatter" } + ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/cnvkit/${meta.id}/" }, + pattern: "*{bed,cnn,cnr,cns,pdf,png}" + ] + } + + withName: '.*:BAM_VARIANT_CALLING_CNVKIT:CNVKIT_CALL' { + ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/cnvkit/${meta.id}/" }, + pattern: "*{cns}" + ] + } + withName: '.*:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_CNVKIT:CNVKIT_CALL' { + ext.prefix = { "${cns.baseName}.germline.call" } + ext.args = "--filter ci" + } + withName: '.*:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_CNVKIT:CNVKIT_CALL' { + ext.prefix = { "${cns.baseName}.somatic.call" } + } + withName: '.*:BAM_VARIANT_CALLING_TUMOR_ONLY_ALL:BAM_VARIANT_CALLING_CNVKIT:CNVKIT_CALL' { + ext.prefix = { "${cns.baseName}.tumor_only.call" } + } + + withName: 'CNVKIT_EXPORT' { + ext.args = "vcf" + ext.prefix = { "${meta.id}.cnvcall" } + ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/cnvkit/${meta.id}/" }, + pattern: "*{vcf}" + ] + } + + withName: 'CNVKIT_GENEMETRICS' { + ext.prefix = { "${cnr.baseName}.genemetrics" } + ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/cnvkit/${meta.id}/" }, + pattern: "*{tsv}" + ] + } +} diff --git a/conf/modules/contamination.config b/conf/modules/contamination.config new file mode 100644 index 0000000000..0cb9406519 --- /dev/null +++ b/conf/modules/contamination.config @@ -0,0 +1,21 @@ +process { + // Contaminant removal options + withName: 'BBMAP_BBSPLIT' { + ext.args = 'build=1 ambiguous2=best maxindel=150000' + ext.prefix = { params.split_fastq && reads ? "${reads[0].getName().tokenize('.')[0]}.${meta.id}" : "${meta.id}" } + + publishDir = [ + [ + path: { "${params.outdir}/preprocessing/bbsplit/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: '*.txt' + ], + [ + path: { params.save_bbsplit_reads ? "${params.outdir}/preprocessing/bbsplit/${meta.id}" : params.outdir }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + saveAs: { params.save_bbsplit_reads ? it : null } + ] + ] + } +} diff --git a/conf/modules/controlfreec.config b/conf/modules/controlfreec.config new file mode 100644 index 0000000000..b6fa043d0e --- /dev/null +++ b/conf/modules/controlfreec.config @@ -0,0 +1,130 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// CONTROLFREEC + +process { + + withName: 'ASSESS_SIGNIFICANCE' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/controlfreec/${meta.id}/" }, + pattern: "*{.p.value.txt}" + ] + } + + withName: 'FREEC_.*' { + ext.when = { params.tools && params.tools.split(',').contains('controlfreec') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/controlfreec/${meta.id}/" }, + pattern: "*{BedGraph,cpn,txt,_CNVs}" + ] + } + + withName: 'FREEC2BED' { + ext.args = { "${params.cf_ploidy}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/controlfreec/${meta.id}/" }, + pattern: "*bed" + ] + } + + withName: 'FREEC2CIRCOS' { + ext.args = { "${params.cf_ploidy}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/controlfreec/${meta.id}/" }, + pattern: "*circos.txt" + ] + } + + withName: 'MAKEGRAPH2' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/controlfreec/${meta.id}/" }, + pattern: "*png" + ] + } + +// TUMOR_ONLY_VARIANT_CALLING + withName: 'FREEC_TUMORONLY' { + ext.args = { [ + "sample":[ + inputformat: 'pileup', + mateorientation: 'FR' + ], + "general" :[ + bedgraphoutput: "TRUE", + breakpointthreshold: params.wes ? "1.2" : "0.8", //Values taken from Freec example configs + breakpointtype: params.wes ? "4" : "2", // Values taken from Freec example configs + coefficientofvariation: params.cf_coeff, + contamination: params.cf_contamination ?: "", + contaminationadjustment: params.cf_contamination_adjustment ? "TRUE" : "", + forcegccontentnormalization: params.wes ? "1" : "0", + minimalsubclonepresence: params.wes ? "30" : "20", + noisydata: params.wes ? "TRUE" : "FALSE", + ploidy: params.cf_ploidy, + printNA: params.wes ? "FALSE" : "TRUE", + readcountthreshold: params.wes ? "50" : "10", + sex: meta.sex, + //uniquematch: not set + window: params.cf_window ?: "" + ], + "BAF":[ + minimalcoverageperposition: params.cf_mincov ?: "", + minimalqualityperposition: params.cf_minqual ?: "", + //"shiftinquality": (optional)not set + ] + ] + } + } + +// PAIR_VARIANT_CALLING + withName: 'FREEC_SOMATIC' { + ext.args = { [ + "sample":[ + inputformat: 'pileup', + mateorientation: 'FR' + ], + "control":[ + inputformat: "pileup", + mateorientation: "FR" + ], + "general" :[ + bedgraphoutput: "TRUE", + breakpointthreshold: params.wes ? "1.2" : "0.8", //Values taken from Freec example configs + breakpointtype: params.wes ? "4" : "2", // Values taken from Freec example configs + coefficientofvariation: params.cf_coeff, + contamination: params.cf_contamination ?: "", + contaminationadjustment: params.cf_contamination_adjustment ? "TRUE" : "", + forcegccontentnormalization: params.wes ? "1" : "0", + minimalsubclonepresence: params.wes ? "30" : "20", + noisydata: params.wes ? "TRUE" : "FALSE", + ploidy: params.cf_ploidy, + printNA: params.wes ? "FALSE" : "TRUE", + readcountthreshold: params.wes ? "50" : "10", + sex: meta.sex, + //uniquematch: not set + window: params.cf_window ?: "" + ], + "BAF":[ + minimalcoverageperposition: params.cf_mincov ?: "", + minimalqualityperposition: params.cf_minqual ?: "", + //"shiftinquality": (optional)not set + ] + ] + } + } +} diff --git a/conf/modules/deepvariant.config b/conf/modules/deepvariant.config new file mode 100644 index 0000000000..6d97477550 --- /dev/null +++ b/conf/modules/deepvariant.config @@ -0,0 +1,46 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// DEEPVARIANT + +process { + + withName: 'DEEPVARIANT_RUNDEEPVARIANT' { + ext.args = {[ + params.wes ? "--model_type=WES" : "--model_type=WGS", + meta.sample ? "--sample_name ${meta.sample}": "", + ].join(" ").trim()} + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.deepvariant" : "${meta.id}.deepvariant.${intervals.baseName}" } + ext.when = { params.tools && params.tools.split(',').contains('deepvariant') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "deepvariant/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_DEEPVARIANT_.*' { + ext.prefix = { "${meta.id}.deepvariant" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/deepvariant/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'MERGE_DEEPVARIANT_GVCF' { + ext.prefix = { "${meta.id}.deepvariant.g" } + } + +} diff --git a/conf/modules/download_cache.config b/conf/modules/download_cache.config new file mode 100644 index 0000000000..5b36ab4cc5 --- /dev/null +++ b/conf/modules/download_cache.config @@ -0,0 +1,36 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// PREPARE_CACHE + +process { + + // SNPEFF + withName: 'SNPEFF_DOWNLOAD' { + ext.when = { params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { params.outdir_cache ? "${params.outdir_cache}/": "${params.outdir}/cache/" } + ] + } + + // VEP + withName: 'ENSEMBLVEP_DOWNLOAD' { + ext.when = { params.tools && (params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge')) } + ext.args = { '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' } + publishDir = [ + mode: params.publish_dir_mode, + path: { params.outdir_cache ? "${params.outdir_cache}/": "${params.outdir}/cache/" } + ] + } +} diff --git a/conf/modules/freebayes.config b/conf/modules/freebayes.config new file mode 100644 index 0000000000..1fcfc7de9b --- /dev/null +++ b/conf/modules/freebayes.config @@ -0,0 +1,84 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// FREEBAYES + +process { + + withName: 'MERGE_FREEBAYES' { + ext.prefix = { "${meta.id}.freebayes" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/freebayes/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'FREEBAYES' { + ext.args = { '--min-alternate-fraction 0.1 --min-mapping-quality 1' } + //To make sure no naming conflicts ensure with module BCFTOOLS_SORT & the naming being correct in the output folder + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}" : "${meta.id}.${target_bed.baseName}" } + ext.when = { params.tools && params.tools.split(',').contains('freebayes') } + publishDir = [ + enabled: false + ] + } + + withName: 'BCFTOOLS_SORT' { + ext.prefix = { meta.num_intervals <= 1 ? meta.id + ".freebayes" : vcf.name - ".vcf" + ".sort" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*vcf.gz", + saveAs: { meta.num_intervals > 1 ? null : "freebayes/${meta.id}/${it}" } + ] + } + + withName : 'TABIX_VC_FREEBAYES' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/freebayes/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName : 'VCFLIB_VCFFILTER' { + ext.prefix = { "${meta.id}.freebayes.filtered" } + ext.args = { "--info-filter 'QUAL > ${params.freebayes_filter}'" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/freebayes/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'TABIX_VC_FREEBAYES_FILT' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/freebayes/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // PAIR_VARIANT_CALLING + withName: '.*:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_FREEBAYES:FREEBAYES' { + ext.args = { "--pooled-continuous \ + --pooled-discrete \ + --genotype-qualities \ + --report-genotype-likelihood-max \ + --allele-balance-priors-off \ + --min-alternate-fraction 0.03 \ + --min-repeat-entropy 1 \ + --min-alternate-count 2 " } + } +} diff --git a/conf/modules/haplotypecaller.config b/conf/modules/haplotypecaller.config new file mode 100644 index 0000000000..7d652e8082 --- /dev/null +++ b/conf/modules/haplotypecaller.config @@ -0,0 +1,68 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// HAPLOTYPECALLER + +process { + + withName: 'GATK4_HAPLOTYPECALLER' { + ext.args = { [ + params.joint_germline ? "-ERC GVCF" : "", + params.gatk_pcr_indel_model ? "--pcr-indel-model ${params.gatk_pcr_indel_model}" : "", + ].join(" ").trim() } + ext.prefix = { meta.num_intervals <= 1 ? ( params.joint_germline ? "${meta.id}.haplotypecaller.g" : "${meta.id}.haplotypecaller" ) : ( params.joint_germline ? "${meta.id}.haplotypecaller.${intervals.baseName}.g" :"${meta.id}.haplotypecaller.${intervals.baseName}" ) } + ext.when = { params.tools && params.tools.split(',').contains('haplotypecaller') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/"}, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "haplotypecaller/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_HAPLOTYPECALLER' { + ext.prefix = { params.joint_germline ? "${meta.id}.haplotypecaller.g" : "${meta.id}.haplotypecaller" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/haplotypecaller/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'CNNSCOREVARIANTS' { + publishDir = [ + // Otherwise it gets published + enabled: false + ] + } + + withName: '.*:VCF_VARIANT_FILTERING_GATK:FILTERVARIANTTRANCHES' { + ext.args = { "--info-key CNN_1D" } + ext.prefix = { "${meta.id}.haplotypecaller" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/haplotypecaller/${meta.id}/"}, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_HAPLOTYPECALLER:BAM_MERGE_INDEX_SAMTOOLS:(MERGE_BAM|INDEX_MERGE_BAM)' { + ext.prefix = { "${meta.id}.realigned" } + publishDir = [ + enabled: true, + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/haplotypecaller/${meta.id}/" }, + pattern: "*{bam,bai}" + ] + } +} diff --git a/conf/modules/indexcov.config b/conf/modules/indexcov.config new file mode 100644 index 0000000000..e7e4a28239 --- /dev/null +++ b/conf/modules/indexcov.config @@ -0,0 +1,22 @@ + +// INDEXCOV + +process { + + withName: 'SAMTOOLS_REINDEX_BAM' { + ext.args = { ' -F 3844 -q 30 ' } // high mapq , primary read paired properly mapped + publishDir = [ + enabled: false + ] + + } + + withName: 'GOLEFT_INDEXCOV' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/indexcov/" } + ] + + } + +} diff --git a/conf/modules/joint_germline.config b/conf/modules/joint_germline.config new file mode 100644 index 0000000000..6f754f615f --- /dev/null +++ b/conf/modules/joint_germline.config @@ -0,0 +1,84 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// JOINT_GERMLINE + +process { + + withName: 'GATK4_GENOMICSDBIMPORT' { + ext.args = { params.wes ? + '--merge-input-intervals --genomicsdb-shared-posixfs-optimizations true --bypass-feature-reader' : + '--genomicsdb-shared-posixfs-optimizations true --bypass-feature-reader' } + ext.prefix = { "${meta.intervals_name}.joint" } + publishDir = [ + enabled: false + ] + } + + withName: 'GATK4_GENOTYPEGVCFS' { + ext.prefix = { meta.intervals_name } + publishDir = [ + enabled: false + ] + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_GATK:BCFTOOLS_SORT' { + ext.prefix = { vcf.baseName - ".vcf" + ".sort" } + publishDir = [ + enabled: false + ] + } + + withName: 'MERGE_GENOTYPEGVCFS' { + ext.prefix = { 'joint_germline' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/haplotypecaller/joint_variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'VARIANTRECALIBRATOR_INDEL' { + ext.args = { '-an QD -an MQRankSum -an ReadPosRankSum -an FS -an SOR -an DP -mode INDEL' } + ext.prefix = { "${meta.id}_INDEL" } + publishDir = [ + enabled: false + ] + } + + withName: 'VARIANTRECALIBRATOR_SNP' { + ext.args = { '-an QD -an MQ -an MQRankSum -an ReadPosRankSum -an FS -an SOR -mode SNP' } + ext.prefix = { "${meta.id}_SNP" } + publishDir = [ + enabled: false + ] + } + + withName: 'GATK4_APPLYVQSR_SNP' { + ext.args = { '--truth-sensitivity-filter-level 99.9 -mode SNP' } + ext.prefix = { "${meta.id}_SNP" } + publishDir = [ + enabled: false + ] + } + + withName: 'GATK4_APPLYVQSR_INDEL' { + ext.args = { '--truth-sensitivity-filter-level 99.9 -mode INDEL' } + ext.prefix = { 'joint_germline_recalibrated' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/haplotypecaller/joint_variant_calling/"}, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } +} diff --git a/conf/modules/lofreq.config b/conf/modules/lofreq.config new file mode 100644 index 0000000000..5517fa0157 --- /dev/null +++ b/conf/modules/lofreq.config @@ -0,0 +1,43 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +//LOFREQ + +process { + + withName: "LOFREQ_CALLPARALLEL" { + ext.args = { "--call-indels" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.lofreq" : "${meta.id}.lofreq.${intervals.baseName}" } + ext.when = { params.tools && params.tools.split(',').contains('lofreq') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "lofreq/${meta.id}/${it}" } + ] + } + + withName:'VCFTOOLS_TSTV_COUNT'{ + errorStrategy = 'ignore' + } + + withName: 'MERGE_LOFREQ.*' { + ext.prefix = { "${meta.id}.lofreq" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/lofreq/${meta.id}" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + +} diff --git a/conf/modules/manta.config b/conf/modules/manta.config new file mode 100644 index 0000000000..0e1aa7b531 --- /dev/null +++ b/conf/modules/manta.config @@ -0,0 +1,26 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MANTA + +process { + withName: 'MANTA_GERMLINE|MANTA_TUMORONLY|MANTA_SOMATIC' { + ext.args = { params.wes ? "--exome" : '' } + ext.prefix = { "${meta.id}.manta" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/manta/${meta.id}" }, + pattern: "*{diploid_sv,tumor_sv,somatic_sv}.{vcf.gz,vcf.gz.tbi}" + ] + } +} diff --git a/conf/modules/markduplicates.config b/conf/modules/markduplicates.config new file mode 100644 index 0000000000..ea017c0156 --- /dev/null +++ b/conf/modules/markduplicates.config @@ -0,0 +1,139 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MARKDUPLICATES + +process { + + withName: 'CRAM_TO_BAM' { + ext.args = { '-b' } + } + + withName: 'BAM_TO_CRAM' { + // BAM provided for step Markduplicates either run through MD or Convert -> then saved as sorted.cram (convert) or md.cram (md directly) + // BAM files provided for step prepare_recal are converted and run through BQSR -> then saved as md.cram + // BAM files provided for step recal are converted and run through BQSR II -> then saved as md.cram + ext.args = { '-C' } + ext.prefix = { "${meta.id}.converted" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/converted/${meta.id}" }, + pattern: "*{cram,crai}", + saveAs: { !params.save_output_as_bam ? it : null } + ] + } + + withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:(BAM_MARKDUPLICATES|BAM_MARKDUPLICATES_SPARK):CRAM_QC_MOSDEPTH_SAMTOOLS:SAMTOOLS_STATS' { + ext.prefix = { "${meta.id}.md.cram" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/samtools/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'BAM_TO_CRAM_MAPPING' { + ext.prefix = { "${meta.id}.sorted" } + // Run only when mapping should be saved as CRAM or when no MD is done + ext.when = (params.save_mapped && !params.save_output_as_bam) || + ( + (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) && + !(params.tools && params.tools.split(',').contains('sentieon_dedup')) + ) + publishDir = [ + // Never publish if BAM only should be published + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/mapped/${meta.id}/" }, + pattern: "*{cram,crai}", + saveAs: { !params.save_output_as_bam ? it : null } + ] + } + + withName: 'GATK4_ESTIMATELIBRARYCOMPLEXITY' { + ext.prefix = { "${meta.id}.md.cram" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates_report')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/markduplicates/${meta.id}" }, + pattern: "*metrics" + ] + } + + withName: 'GATK4_MARKDUPLICATES' { + ext.args = { [ + "--REMOVE_DUPLICATES false", + "--VALIDATION_STRINGENCY LENIENT", + params.markduplicates_pixel_distance ? "--OPTICAL_DUPLICATE_PIXEL_DISTANCE ${params.markduplicates_pixel_distance}" : '', + params.umi_tag ? "--BARCODE_TAG ${params.umi_tag}" : + (params.umi_in_read_header || params.umi_length ? "--BARCODE_TAG RX" : "") + ].join(" ").trim() } + ext.prefix = { "${meta.id}.md.cram" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) } + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, + pattern: "*{cram,crai}", + saveAs: { !params.save_output_as_bam ? it : null } + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/" }, + pattern: "*metrics", + saveAs: { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates_report')) ? "markduplicates/${meta.id}/${it}" : null} + ] + ] + } + + withName: 'GATK4SPARK_MARKDUPLICATES' { + containerOptions = '' + ext.args = { '--remove-sequencing-duplicates false -VS LENIENT' } + ext.prefix = { "${meta.id}.md.cram" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, + pattern: "*{cram,crai}", + saveAs: { !params.save_output_as_bam ? it : null } + ] + } + + withName: 'INDEX_MARKDUPLICATES' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, + pattern: "*{cram,crai}", + saveAs: { !params.save_output_as_bam ? it : null } + ] + } + + withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:CRAM_TO_BAM' { + ext.when = { params.save_output_as_bam } + ext.prefix = { (params.tools && params.tools.split(',').contains('sentieon_dedup')) + ? "${meta.id}.dedup" + : "${meta.id}.md" } + publishDir = (params.tools && params.tools.split(',').contains('sentieon_dedup')) + ? [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/sentieon_dedup/${meta.id}/" }, + pattern: "*{dedup.bam,dedup.bam.bai}", + saveAs: { params.save_output_as_bam ? it : null } + ] + : [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, + pattern: "*{md.bam,md.bam.bai}", + saveAs: { params.save_output_as_bam ? it : null } + ] + } +} diff --git a/conf/modules/modules.config b/conf/modules/modules.config new file mode 100644 index 0000000000..9facd68650 --- /dev/null +++ b/conf/modules/modules.config @@ -0,0 +1,123 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +process { + // QC + withName: 'FASTQC' { + ext.args = { '--quiet' } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('fastqc')) } + publishDir = [ + [ + path: { "${params.outdir}/reports/fastqc/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*{html,zip}" + ] + ] + } + + withName: 'MULTIQC' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + } + + withName: 'NFCORE_SAREK:SAREK:CRAM_TO_BAM' { + publishDir = [ + path: { "${params.outdir}/preprocessing/converted/cram_to_bam/${meta.id}" }, + pattern: "*{bam,bam.bai}", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:CRAM_QC_NO_MD:SAMTOOLS_STATS' { + ext.prefix = { "${meta.id}.sorted.cram" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/samtools/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'NFCORE_SAREK:SAREK:SPRING_DECOMPRESS_.*' { + ext.prefix = { "${spring.simpleName}" } + publishDir = [ + enabled: false + ] + } + + withName: 'MOSDEPTH' { + ext.args = { !params.wes ? "-n --fast-mode --by 500" : ""} + ext.prefix = { + if (params.tools && params.tools.split(',').contains('sentieon_dedup')) { + "${meta.id}.dedup" + } else if (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) { + "${meta.id}.sorted" + } else { + "${meta.id}.md" + } + } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('mosdepth')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/mosdepth/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'NFCORE_SAREK:SAREK:CRAM_SAMPLEQC:CRAM_QC_RECAL:MOSDEPTH' { + ext.prefix = { "${meta.id}.recal" } + } + + withName: 'NFCORE_SAREK:SAREK:CRAM_SAMPLEQC:CRAM_QC_RECAL:SAMTOOLS_STATS' { + ext.prefix = { "${meta.id}.recal.cram" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/samtools/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // VCF + withName: 'BCFTOOLS_STATS' { + ext.prefix = { vcf.baseName - ".vcf" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('bcftools')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/bcftools/${meta.variantcaller}/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'VCFTOOLS_.*' { + ext.prefix = { variant_file.baseName - ".vcf" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('vcftools')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/vcftools/${meta.variantcaller}/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'VCFTOOLS_TSTV_COUNT' { + ext.args = { '--TsTv-by-count' } + } + + withName: 'VCFTOOLS_TSTV_QUAL' { + ext.args = { '--TsTv-by-qual' } + } + + withName: 'VCFTOOLS_SUMMARY' { + ext.args = { '--FILTER-summary' } + } +} diff --git a/conf/modules/mpileup.config b/conf/modules/mpileup.config new file mode 100644 index 0000000000..edd2199354 --- /dev/null +++ b/conf/modules/mpileup.config @@ -0,0 +1,86 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MPILEUP + +process { + + withName: 'CAT_MPILEUP' { + publishDir = [ + enabled: false + ] + } + + withName: 'BCFTOOLS_MPILEUP' { + ext.args2 = { '--multiallelic-caller' } + ext.args3 = { "-i 'count(GT==\"RR\")==0'" } // only report non homozygous reference variants + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.bcftools" : "${meta.id}_${intervals.baseName}.bcftools" } + ext.when = { params.tools && params.tools.split(',').contains('mpileup') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/bcftools/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : it } + ] + } + + withName: 'MERGE_BCFTOOLS_MPILEUP' { + ext.prefix = {"${meta.id}.bcftools"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/bcftools/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'SAMTOOLS_MPILEUP' { + ext.when = { params.tools && params.tools.split(',').contains('controlfreec') } + publishDir = [ + enabled: false + ] + + } + +// PAIR_VARIANT_CALLING + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_MPILEUP:SAMTOOLS_MPILEUP' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.normal" : "${meta.id}_${intervals.simpleName}.normal" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_MPILEUP:CAT_MPILEUP' { + ext.prefix = { "${meta.id}.normal.mpileup.gz" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_NORMAL:SAMTOOLS_MPILEUP' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.normal" : "${meta.id}_${intervals.simpleName}.normal" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_NORMAL:CAT_MPILEUP' { + ext.prefix = { "${meta.id}.normal.mpileup.gz" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_TUMOR:SAMTOOLS_MPILEUP' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.tumor" : "${meta.id}_${intervals.simpleName}.tumor" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_TUMOR:CAT_MPILEUP' { + ext.prefix = { "${meta.id}.tumor.mpileup.gz" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_TUMOR_ONLY_ALL:BAM_VARIANT_CALLING_MPILEUP:SAMTOOLS_MPILEUP' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.tumor" : "${meta.id}_${intervals.simpleName}.tumor" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_TUMOR_ONLY_ALL:BAM_VARIANT_CALLING_MPILEUP:CAT_MPILEUP' { + ext.prefix = { "${meta.id}.tumor.mpileup.gz" } + } +} diff --git a/conf/modules/msisensor2.config b/conf/modules/msisensor2.config new file mode 100644 index 0000000000..84c0f342ba --- /dev/null +++ b/conf/modules/msisensor2.config @@ -0,0 +1,25 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MSISENSOR2 + +process { + withName: 'MSISENSOR2_MSI' { + ext.args = { params.wes ? "-c 20" : "-c 15"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/msisensor2/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/msisensorpro.config b/conf/modules/msisensorpro.config new file mode 100644 index 0000000000..411eee3742 --- /dev/null +++ b/conf/modules/msisensorpro.config @@ -0,0 +1,24 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MSISENSORPRO + +process { + withName: 'MSISENSORPRO_MSISOMATIC' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/msisensorpro/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/muse.config b/conf/modules/muse.config new file mode 100644 index 0000000000..bfe4b291be --- /dev/null +++ b/conf/modules/muse.config @@ -0,0 +1,54 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MUSE + +process { + + withName: 'MUSE_CALL' { + ext.args = { params.wes ? '-E' : '-G' } + ext.when = { params.tools && params.tools.contains('muse') } + ext.prefix = { "${meta.id}" } // .MuSE. is added by the tool by default + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/muse/${meta.id}/" }, + pattern: "*.txt", + ] + } + + withName: 'MUSE_SUMP' { + ext.args = { params.wes ? '-E' : '-G' } + ext.when = { params.tools && params.tools.contains('muse') } + ext.prefix = { "${meta.id}.muse" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/muse/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'CRAM_TO_BAM_TUMOR' { + ext.prefix = {"${meta.tumor_id}"} + } + + withName: 'CRAM_TO_BAM_NORMAL' { + ext.prefix = {"${meta.normal_id}"} + } + + withName: 'TABIX_MUSE' { + publishDir = [ + enabled: false + ] + } + +} diff --git a/conf/modules/mutect2.config b/conf/modules/mutect2.config new file mode 100644 index 0000000000..d12c476e17 --- /dev/null +++ b/conf/modules/mutect2.config @@ -0,0 +1,102 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MUTECT2 + +process { + + withName: 'GATK4_MUTECT2' { + ext.args = { params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz" : "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.baseName}" } + ext.when = { params.tools && params.tools.split(',').contains('mutect2') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi,stats}", + saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.id}/${it}" } + ] + } + + // PAIR_VARIANT_CALLING + withName: 'MUTECT2_PAIRED' { + ext.args = { params.ignore_soft_clipped_bases ? + "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.patient}_${meta.normal_id}" : + "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.patient}_${meta.normal_id}" } + } + + withName: 'MERGE_MUTECT2.*' { + ext.prefix = { "${meta.id}.mutect2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'FILTERMUTECTCALLS.*' { + ext.prefix = {"${meta.id}.mutect2.filtered"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'CALCULATECONTAMINATION' { + ext.args = { "-tumor-segmentation ${meta.id}.mutect2.segmentation.table" } + ext.prefix = { "${meta.id}.mutect2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { params.joint_mutect2 ? "${params.outdir}/variant_calling/mutect2/${meta.patient}" : "${params.outdir}/variant_calling/mutect2/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'LEARNREADORIENTATIONMODEL' { + ext.prefix = { "${meta.id}.mutect2.artifactprior" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'MERGEMUTECTSTATS' { + ext.prefix = { "${meta.id}.mutect2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GATHERPILEUPSUMMARIES.*' { + ext.prefix = { "${meta.id}.mutect2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GETPILEUPSUMMARIES.*' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.baseName}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*.table", + saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.id}/${it}" } + ] + } + +} diff --git a/conf/modules/ngscheckmate.config b/conf/modules/ngscheckmate.config new file mode 100644 index 0000000000..4d35c94468 --- /dev/null +++ b/conf/modules/ngscheckmate.config @@ -0,0 +1,24 @@ +process { + + withName: '.*BAM_NGSCHECKMATE:BCFTOOLS_MPILEUP' { + ext.args2 = { '--no-version --ploidy 1 -c' } + ext.args3 = { '--no-version' } + ext.prefix = { "${meta.id}.ngscheckmate" } + ext.when = { params.tools && params.tools.split(',').contains('ngscheckmate') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/ngscheckmate/vcfs" }, + pattern: "*{vcf.gz}" + ] + } + + withName: '.*BAM_NGSCHECKMATE:NGSCHECKMATE_NCM' { + ext.args = { '-V' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/ngscheckmate/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + +} diff --git a/conf/modules/post_variant_calling.config b/conf/modules/post_variant_calling.config new file mode 100644 index 0000000000..2eb80cd0dd --- /dev/null +++ b/conf/modules/post_variant_calling.config @@ -0,0 +1,101 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// POSTPROCESSING VCFS +// Like, for instance, concatenating the unannotated, germline vcf-files + +process { + + withName: 'FILTER_VCFS' { + ext.args = { [ params.bcftools_filter_criteria, + "--output-type z --write-index=tbi" + ].join(" ").trim() } + ext.prefix = { vcf.baseName - '.vcf' + '.bcftools_filtered' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/filtered/${meta.id}/" }, + pattern: "*{.tbi,.vcf.gz}" + ] + } + + withName: 'ADD_INFO_TO_VCF' { + publishDir = [ enabled: false ] + } + + withName: 'TABIX_EXT_VCF' { + ext.prefix = { "${input.baseName}" } + publishDir = [ enabled: false ] + } + + withName: 'VCFS_NORM' { + ext.prefix = { vcf.baseName - '.added_info.vcf' + '.norm' } + ext.args = { [ + '--multiallelics -both', // split multiallelic sites into biallelic records and both SNPs and indels should be merged separately into two records + '--rm-dup all' // output only the first instance of a record which is present multiple times + ].join(' ') } + publishDir = [ + enabled: false + ] + } + + withName: 'VCFS_NORM_SORT' { + ext.prefix = { vcf.baseName - '.vcf' + '.sorted' } + ext.args = "--output-type z --write-index=tbi" + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/normalized/${meta.id}/" }, + pattern: "*{vcf.gz,tbi}" + ] + } + + withName: 'NFCORE_SAREK:SAREK:POST_VARIANTCALLING:CONSENSUS:BCFTOOLS_CONCAT' { + ext.args = { "--output-type z --write-index=tbi --allow-overlaps" } + ext.prefix = { "${meta.id}.strelka" } + publishDir = [ enabled: false ] + } + + withName: 'BCFTOOLS_ISEC' { + // Things we can't support at moment are scenarios such as -n ~1100 (present in the first two but not the last two files) + ext.args = { "-n+${params.consensus_min_count} --output-type z --write-index=tbi"} + ext.prefix = { "${meta.id}_consensus" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/consensus/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'CONSENSUS_FROM_SITES' { + ext.prefix = { "${meta.id}.consensus" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/consensus/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'GERMLINE_VCFS_CONCAT' { + ext.args = { "-a --output-type z" } + publishDir = [ enabled: false ] + } + + withName: 'GERMLINE_VCFS_CONCAT_SORT' { + ext.prefix = { "${meta.id}.germline" } + ext.args = "--output-type z --write-index=tbi" + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/concat/${meta.id}/" }, + pattern: "*{vcf.gz,tbi}" + ] + } +} diff --git a/conf/modules/prepare_genome.config b/conf/modules/prepare_genome.config new file mode 100644 index 0000000000..5a84ea0a1f --- /dev/null +++ b/conf/modules/prepare_genome.config @@ -0,0 +1,189 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// PREPARE_GENOME && PREPARE_REFERENCE_CNVKIT +process { + + withName: 'BBMAP_INDEX' { + ext.args = 'build=1' + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + saveAs: { params.save_reference || params.build_only_index ? it : null } + ] + } + + withName: 'BWAMEM1_INDEX' { + ext.prefix = { params.aligner == "parabricks" ? "${fasta.name}" : "${fasta.baseName}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "bwa", + saveAs: { params.save_reference || params.build_only_index ? it : null } + ] + } + + withName: 'BWAMEM2_INDEX' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "bwamem2", + saveAs: { params.save_reference || params.build_only_index ? it : null } + ] + } + + withName: 'CNVKIT_ANTITARGET' { + ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "*{bed}", + saveAs: { params.save_reference || params.build_only_index ? "cnvkit/${it}" : null } + ] + } + + withName: 'CNVKIT_REFERENCE' { + ext.prefix = { 'cnvkit' } + ext.when = { params.tools && params.tools.split(',').contains('cnvkit') && !params.cnvkit_reference } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "*{cnn}", + saveAs: { params.save_reference || params.build_only_index ? "cnvkit/${it}" : null } + ] + } + + withName: 'DRAGMAP_HASHTABLE' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "dragmap", + saveAs: { params.save_reference || params.build_only_index ? it : null } + ] + } + + withName: 'GATK4_CREATESEQUENCEDICTIONARY' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/dict" }, + pattern: "*dict", + saveAs: { params.save_reference || params.build_only_index ? it : null } + ] + } + + withName: 'MSISENSOR2_SCAN' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/msisensor2" }, + pattern: "*scan", + saveAs: { params.save_reference || params.build_only_index ? it : null } + ] + } + + withName: 'MSISENSORPRO_SCAN' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/msisensorpro" }, + pattern: "*list", + saveAs: { params.save_reference || params.build_only_index ? it : null } + ] + } + + withName: 'SAMTOOLS_FAIDX' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/fai" }, + pattern: "*fai", + saveAs: { params.save_reference || params.build_only_index ? it : null } + ] + } + + withName: 'TABIX_BCFTOOLS_ANNOTATIONS' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/bcfann" }, + pattern: "*vcf.gz.tbi", + saveAs: { params.save_reference || params.build_only_index ? it : null } + ] + } + + withName: 'TABIX_DBSNP' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/dbsnp" }, + pattern: "*vcf.gz.tbi", + saveAs: { params.save_reference || params.build_only_index ? it : null } + ] + } + + withName: 'TABIX_GERMLINE_RESOURCE' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/germline_resource" }, + pattern: "*vcf.gz.tbi", + saveAs: { params.save_reference || params.build_only_index ? it : null } + ] + } + + withName: 'TABIX_KNOWN_INDELS' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/known_indels" }, + pattern: "*vcf.gz.tbi", + saveAs: { params.save_reference || params.build_only_index ? it : null } + ] + } + + withName: 'TABIX_KNOWN_SNPS' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/known_snps" }, + pattern: "*vcf.gz.tbi", + saveAs: { params.save_reference || params.build_only_index ? it : null } + ] + } + + withName: 'TABIX_PON' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/pon" }, + pattern: "*vcf.gz.tbi", + saveAs: { params.save_reference || params.build_only_index ? it : null } + ] + } + + withName: 'UNZIP_ALLELES|UNZIP_LOCI|UNZIP_GC|UNZIP_RT' { + publishDir = [ + enabled: false + ] + } + + withName: 'UNTAR_CHR_DIR' { + ext.prefix = 'chr_dir' + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/" }, + saveAs: { (params.save_reference || params.build_only_index) && !it.equals('versions.yml') ? it : null } + ] + } + + withName: 'UNTAR_MSISENSOR2_MODELS' { + ext.prefix = 'models' + publishDir = [ + enabled: true, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/msisensor2" }, + pattern: "models", + saveAs: { params.save_reference || params.build_only_index ? it : null } + ] + } +} diff --git a/conf/modules/prepare_intervals.config b/conf/modules/prepare_intervals.config new file mode 100644 index 0000000000..815903b996 --- /dev/null +++ b/conf/modules/prepare_intervals.config @@ -0,0 +1,50 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// PREPARE INTERVALS + +process { + + withName: 'BUILD_INTERVALS' { + ext.args = { "-v FS='\t' -v OFS='\t' '{ print \$1, \"0\", \$2 }'" } + ext.suffix = { "bed" } + } + + withName: 'CREATE_INTERVALS_BED' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "*bed", + saveAs: { params.save_reference || params.build_only_index ? "intervals/${it}" : null } + ] + } + + withName: 'GATK4_INTERVALLISTTOBED' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "*bed", + saveAs: { params.save_reference || params.build_only_index ? "intervals/${it}" : null } + ] + } + + withName: 'TABIX_BGZIPTABIX_INTERVAL_SPLIT|TABIX_BGZIPTABIX_INTERVAL_COMBINED' { + ext.prefix = {"${meta.id}"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "*bed.gz", + saveAs: { params.save_reference || params.build_only_index ? "intervals/${it}" : null } + ] + } +} diff --git a/conf/modules/prepare_recalibration.config b/conf/modules/prepare_recalibration.config new file mode 100644 index 0000000000..d9c74fe044 --- /dev/null +++ b/conf/modules/prepare_recalibration.config @@ -0,0 +1,38 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// PREPARE_RECALIBRATION + +process { + + withName: 'GATK4_BASERECALIBRATOR|GATK4SPARK_BASERECALIBRATOR' { + containerOptions = params.use_gatk_spark ? '' : null + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*table", + saveAs: { meta.num_intervals > 1 ? null : "recal_table/${meta.id}/${it}" } + ] + } + + withName: 'GATK4_GATHERBQSRREPORTS' { + ext.prefix = {"${meta.id}.recal"} + ext.when = { meta.num_intervals > 1 } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recal_table/${meta.id}/" }, + pattern: "*table", + ] + } +} diff --git a/conf/modules/recalibrate.config b/conf/modules/recalibrate.config new file mode 100644 index 0000000000..0869857522 --- /dev/null +++ b/conf/modules/recalibrate.config @@ -0,0 +1,74 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// RECALIBRATE + +process { + + withName: 'GATK4_APPLYBQSR|GATK4SPARK_APPLYBQSR' { + containerOptions = params.use_gatk_spark ? '' : null + ext.prefix = { + meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.baseName}.recal" + } + ext.suffix = { params.save_output_as_bam ? 'bam' : 'cram'} + publishDir = [[ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*recal.cram", + saveAs: { !params.save_output_as_bam ? meta.num_intervals > 1 ? null : "recalibrated/${meta.id}/${it}" : null } + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*recal.bam", + saveAs: { params.save_output_as_bam ? meta.num_intervals > 1 ? null : "recalibrated/${meta.id}/${it}" : null } + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*recal.bai", + saveAs: { params.save_output_as_bam ? meta.num_intervals > 1 ? null : "recalibrated/${meta.id}/${it.replace('.bai', '.bam.bai')}" : null } + ]] + } + + withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:(BAM_APPLYBQSR|BAM_APPLYBQSR_SPARK):CRAM_MERGE_INDEX_SAMTOOLS:MERGE_CRAM' { + ext.prefix = { "${meta.id}.recal" } + ext.when = { meta.num_intervals > 1 } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" }, + pattern: "*cram", + saveAs: { !params.save_output_as_bam ? it : null } + ] + } + + withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:(BAM_APPLYBQSR|BAM_APPLYBQSR_SPARK):CRAM_MERGE_INDEX_SAMTOOLS:INDEX_CRAM' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" }, + pattern: "*{recal.cram,recal.cram.crai}", + saveAs: { !params.save_output_as_bam ? it : null } + ] + } + + withName: 'CRAM_TO_BAM_RECAL' { + ext.prefix = { "${meta.id}.recal" } + ext.when = { params.save_output_as_bam} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" }, + pattern: "*{recal.bam,recal.bam.bai}", + saveAs: { params.save_output_as_bam ? it : null } + ] + } +} diff --git a/conf/modules/sentieon_dedup.config b/conf/modules/sentieon_dedup.config new file mode 100644 index 0000000000..b01e323643 --- /dev/null +++ b/conf/modules/sentieon_dedup.config @@ -0,0 +1,53 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// SENTIEON_DEDUP + +process { + + withName: 'SENTIEON_DEDUP' { + ext.prefix = { params.sentieon_consensus ? "${meta.id}.consensus.cram" : "${meta.id}.dedup.cram" } + ext.when = { params.tools && params.tools.split(',').contains('sentieon_dedup') } + ext.args2 = [ + params.umi_tag ? "--umi_tag ${params.umi_tag}" : // pre-calculated UMI tag + (params.umi_in_read_header || params.umi_length ? "--umi_tag RX" : ""), // UMI based deduplication using RX tag from fgbio/copyumifromreadname + params.sentieon_consensus ? "--consensus" : "" // Generate consensus reads instead of just deduplication + ].join(' ').trim() + + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { params.sentieon_consensus ? "${params.outdir}/preprocessing/sentieon_consensus/${meta.id}/" : "${params.outdir}/preprocessing/sentieon_dedup/${meta.id}/" }, + pattern: "*{cram,crai}", + saveAs: { !params.save_output_as_bam ? it : null } + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/" }, + pattern: "*{metrics,metrics.multiqc.tsv}", + saveAs: { !(params.skip_tools && params.skip_tools.split(',').contains('sentieon_dedup_report')) ? "sentieon_dedup/${meta.id}/${it}" : null} + ] + ] + } + + withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:BAM_SENTIEON_DEDUP:CRAM_QC_MOSDEPTH_SAMTOOLS:SAMTOOLS_STATS' { + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } + ext.prefix = { "${meta.id}.dedup.cram" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/samtools/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + +} diff --git a/conf/modules/sentieon_dnascope.config b/conf/modules/sentieon_dnascope.config new file mode 100644 index 0000000000..50cf373ea8 --- /dev/null +++ b/conf/modules/sentieon_dnascope.config @@ -0,0 +1,56 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// SENTIEON DNASCOPE + +process { + + withName: 'SENTIEON_DNASCOPE' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.dnascope" : "${meta.id}.dnascope.${intervals.baseName}" } + ext.when = { params.tools && params.tools.split(',').contains('sentieon_dnascope') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/"}, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "sentieon_dnascope/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_SENTIEON_DNASCOPE_VCFS' { + ext.prefix = { params.joint_germline ? "${meta.id}.dnascope.g" : "${meta.id}.dnascope.unfiltered" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sentieon_dnascope/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'MERGE_SENTIEON_DNASCOPE_GVCFS' { + ext.prefix = { "${meta.id}.dnascope.g" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sentieon_dnascope/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SENTIEON_DNAMODELAPPLY' { + ext.prefix = { "${meta.id}.dnascope.filtered" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sentieon_dnascope/${meta.id}/"}, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + +} diff --git a/conf/modules/sentieon_haplotyper.config b/conf/modules/sentieon_haplotyper.config new file mode 100644 index 0000000000..e3ce58194a --- /dev/null +++ b/conf/modules/sentieon_haplotyper.config @@ -0,0 +1,57 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// SENTIEON HAPLOTYPER + +process { + + withName: 'SENTIEON_HAPLOTYPER' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.haplotyper" : "${meta.id}.haplotyper.${intervals.baseName}" } + ext.when = { params.tools && params.tools.split(',').contains('sentieon_haplotyper') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/"}, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "sentieon_haplotyper/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_SENTIEON_HAPLOTYPER_VCFS' { + ext.prefix = { params.joint_germline ? "${meta.id}.haplotyper.g" : "${meta.id}.haplotyper.unfiltered" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sentieon_haplotyper/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'MERGE_SENTIEON_HAPLOTYPER_GVCFS' { + ext.prefix = { "${meta.id}.haplotyper.g" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sentieon_haplotyper/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:SENTIEON_HAPLOTYPER_VCF_VARIANT_FILTERING_GATK:FILTERVARIANTTRANCHES' { + ext.args = { "--info-key CNN_1D" } + ext.prefix = { "${meta.id}.haplotyper" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sentieon_haplotyper/${meta.id}/"}, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + +} diff --git a/conf/modules/sentieon_joint_germline.config b/conf/modules/sentieon_joint_germline.config new file mode 100644 index 0000000000..c423d98cc1 --- /dev/null +++ b/conf/modules/sentieon_joint_germline.config @@ -0,0 +1,77 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// SENTIEON JOINT_GERMLINE + +process { + + withName: 'SENTIEON_GVCFTYPER' { + ext.args = { '--allow-old-rms-mapping-quality-annotation-data' } + ext.prefix = { meta.intervals_name } + publishDir = [ + enabled: false + ] + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:BCFTOOLS_SORT' { + ext.prefix = { vcf.baseName - ".vcf" + ".sort" } + publishDir = [ + enabled: false + ] + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:MERGE_GENOTYPEGVCFS' { + ext.prefix = { 'joint_germline' } + publishDir = [ + mode: params.publish_dir_mode, + path: { params.tools && params.tools.contains('sentieon_dnascope') && params.joint_germline ? "${params.outdir}/variant_calling/sentieon_dnascope/joint_variant_calling/" : "${params.outdir}/variant_calling/sentieon_haplotyper/joint_variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'SENTIEON_VARCAL_INDEL' { + ext.args = { '--annotation QD --annotation MQRankSum --annotation ReadPosRankSum --annotation FS --annotation SOR --annotation DP --var_type INDEL' } + ext.prefix = { "${meta.id}_INDEL" } + publishDir = [ + enabled: false + ] + } + + withName: 'SENTIEON_APPLYVARCAL_INDEL' { + ext.args2 = { '--sensitivity 99.9 --var_type INDEL' } + ext.prefix = { 'joint_germline_recalibrated_indel' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sentieon_haplotyper/joint_variant_calling/"}, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'SENTIEON_VARCAL_SNP' { + ext.args = { '--annotation QD --annotation MQRankSum --annotation ReadPosRankSum --annotation FS --annotation SOR --annotation DP --var_type SNP' } + ext.prefix = { "${meta.id}_SNP" } + publishDir = [ + enabled: false + ] + } + + withName: 'SENTIEON_APPLYVARCAL_SNP' { + ext.args2 = { '--sensitivity 99.9 --var_type SNP' } + ext.prefix = { "${meta.id}_SNP" } + // don't publish, as the output is put through SENTIEON_APPLYVARCAL_INDEL afterwards for a second round + publishDir = [ + enabled: false + ] + } + +} diff --git a/conf/modules/sentieon_tnscope.config b/conf/modules/sentieon_tnscope.config new file mode 100644 index 0000000000..6c9ded0a80 --- /dev/null +++ b/conf/modules/sentieon_tnscope.config @@ -0,0 +1,41 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// SENTIEON TNSCOPE + +process { + + withName: 'SENTIEON_TNSCOPE' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.tnscope" : "${meta.id}.tnscope.${intervals.baseName}" } + ext.args2 = { meta.normal_id ? + "--tumor_sample ${meta.patient}_${meta.tumor_id} --normal_sample ${meta.patient}_${meta.normal_id}" : + "--tumor_sample ${meta.patient}_${meta.sample}" } + ext.when = { params.tools && params.tools.split(',').contains('sentieon_tnscope') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/"}, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "sentieon_tnscope/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_TNSCOPE.*' { + ext.prefix = { "${meta.id}.tnscope" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sentieon_tnscope/${meta.id}" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + +} diff --git a/conf/modules/strelka.config b/conf/modules/strelka.config new file mode 100644 index 0000000000..badffb5df8 --- /dev/null +++ b/conf/modules/strelka.config @@ -0,0 +1,53 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// STRELKA + +process { + + withName: 'STRELKA_.*' { + ext.args = { params.wes ? '--exome' : '' } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.strelka" : "${meta.id}.strelka.${target_bed.baseName}" } + ext.when = { params.tools && params.tools.split(',').contains('strelka') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "strelka/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_STRELKA.*' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/strelka/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'MERGE_STRELKA' { + ext.prefix = { "${meta.id}.strelka.variants" } + } + + withName: 'MERGE_STRELKA_GENOME' { + ext.prefix = { "${meta.id}.strelka.genome" } + } + + // PAIR_VARIANT_CALLING + withName: 'MERGE_STRELKA_INDELS' { + ext.prefix = { "${meta.id}.strelka.somatic_indels" } + } + withName: 'MERGE_STRELKA_SNVS' { + ext.prefix = { "${meta.id}.strelka.somatic_snvs" } + } +} diff --git a/conf/modules/tiddit.config b/conf/modules/tiddit.config new file mode 100644 index 0000000000..cb0e7d400a --- /dev/null +++ b/conf/modules/tiddit.config @@ -0,0 +1,57 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// TIDDIT + +process { + + withName: 'TIDDIT_SV' { + ext.args = { bwa_index ? '' : '--skip_assembly' } + ext.prefix = { "${meta.id}.tiddit" } + ext.when = { params.tools && params.tools.split(',').contains('tiddit') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/tiddit/${meta.id}/" }, + pattern: "*tab" + ] + } + + withName : 'TABIX_BGZIP_TIDDIT_SV' { + ext.prefix = { "${meta.id}.tiddit" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/tiddit/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + // PAIR_VARIANT_CALLING + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_SOMATIC_TIDDIT:TIDDIT_NORMAL:TABIX_BGZIP_TIDDIT_SV' { + ext.prefix = {"${meta.id}.tiddit.normal"} + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_SOMATIC_TIDDIT:TIDDIT_TUMOR:TABIX_BGZIP_TIDDIT_SV' { + ext.prefix = {"${meta.id}.tiddit.tumor"} + } + + // SVDB + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_SOMATIC_TIDDIT:SVDB_MERGE' { + ext.args2 = { '--output-type z' } + ext.prefix = { "${meta.id}.tiddit_sv_merge" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/tiddit/${meta.id}/" }, + pattern: "*vcf.gz" + ] + } +} diff --git a/conf/modules/trimming.config b/conf/modules/trimming.config new file mode 100644 index 0000000000..0dea7a78f7 --- /dev/null +++ b/conf/modules/trimming.config @@ -0,0 +1,46 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// TRIMMING + +process { + + withName: 'FASTP' { + ext.args = [ + !params.trim_fastq ? '--disable_adapter_trimming' : '', // Disable adapter trimming + params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : '', // Remove bp from the 5' end of read 1 + params.clip_r2 > 0 ? "--trim_front2 ${params.clip_r2}" : '', // Remove bp from the 5' end of read 2 + params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : '', // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed + params.three_prime_clip_r2 > 0 ? "--trim_tail2 ${params.three_prime_clip_r2}" : '', // Remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed + params.trim_nextseq ? '--trim_poly_g' : '', // Apply the --nextseq=X option, to trim based on quality after removing poly-G tails + params.split_fastq > 0 ? "--split_by_lines ${params.split_fastq * 4}" : '', // Output by limiting lines of each file with this option + params.length_required > 0 ? "--length_required ${params.length_required}": '', // Reads shorter will be discarded + params.umi_location ? "-U --umi_loc ${params.umi_location}" : '', // Location of the UMI(s) + params.umi_length ? "--umi_len ${params.umi_length}" : '', // Length of the UMI(s) + params.umi_base_skip ? "--umi_skip ${params.umi_base_skip}" : '', // Number of bases to ignore after the UMI (e.g. constant bases present after the UMI) + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/reports/fastp/${meta.sample}" }, + mode: params.publish_dir_mode, + pattern: "*.{html,json,log}" + ], + [ + path: { "${params.outdir}/preprocessing/fastp/${meta.sample}" }, + mode: params.publish_dir_mode, + pattern: "*.fastp.fastq.gz", + enabled: params.save_trimmed || params.save_split_fastqs + ] + ] + } +} diff --git a/conf/modules/umi.config b/conf/modules/umi.config new file mode 100644 index 0000000000..3ca470185d --- /dev/null +++ b/conf/modules/umi.config @@ -0,0 +1,91 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// UMI + +process { + + withName: 'FASTQTOBAM' { + // if UMIs are in read header, use these and ignore umi_read_structure + ext.args = { params.umi_in_read_header ? '--extract-umis-from-read-names' : "--read-structures $params.umi_read_structure" } + ext.prefix = {"${meta.id}"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'BAM2FASTQ' { + ext.args = '-T RX' + ext.when = { params.umi_read_structure } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:FASTQ_CREATE_UMI_CONSENSUS_FGBIO:ALIGN_UMI:BWAMEM.*_MEM' { + ext.args = { "-K 100000000 -p -C -Y -R ${meta.read_group}" } + ext.args2 = { '-bS' } + ext.prefix = { "${meta.id}.umi_unsorted" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:FASTQ_CREATE_UMI_CONSENSUS_FGBIO:ALIGN_UMI:DRAGMAP_ALIGN' { + ext.args2 = { '-bS' } + ext.prefix = { "${meta.id}.umi_unsorted" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'MERGE_CONSENSUS' { + ext.args = '--template-coordinate' + ext.prefix = { "${meta.id}_unsorted_tagged" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'GROUPREADSBYUMI' { + publishDir = [ + [ path: { "${params.outdir}/reports/umi/" }, + mode: params.publish_dir_mode, + pattern: "*.{txt}" + ] + ] + } + + withName: 'CALLUMICONSENSUS' { + ext.args = { '-S Coordinate' } + ext.prefix = { "${meta.id}_umi-consensus" } + publishDir = [ + path: { "${params.outdir}/preprocessing/umi/${meta.sample}" }, + mode: params.publish_dir_mode, + pattern: "*.{bam}" + ] + } + + withName: 'FGBIO_COPYUMIFROMREADNAME' { + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + +} diff --git a/conf/modules/varlociraptor.config b/conf/modules/varlociraptor.config new file mode 100644 index 0000000000..029fb40fde --- /dev/null +++ b/conf/modules/varlociraptor.config @@ -0,0 +1,207 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// VARLOCIRAPTOR +// Everything related to varlociraptor which is chained after the variant calling step. + +process { + + // VARLOCIRAPTOR ALL + withName: FILL_SCENARIO_FILE { + ext.prefix = { "${meta.id}.scenario.varlociraptor" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/varlociraptor/${meta.id}/" }, + ] + } + + // + // VCF_VARLOCIRAPTOR_GERMLINE and VCF_VARLOCIRAPTOR_TUMOR_ONLY MODULES + // + withName: VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES { + ext.prefix = { "${meta.id}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/varlociraptor/${meta.id}/" }, + ] + } + + withName: '.*:VCF_VARLOCIRAPTOR_GERMLINE:RBT_VCFSPLIT|.*:VCF_VARLOCIRAPTOR_TUMOR_ONLY:RBT_VCFSPLIT' { + ext.prefix = { "${meta.id}.${meta.variantcaller}.split" } + publishDir = [ + enabled: false + ] + } + + withName: VARLOCIRAPTOR_PREPROCESS { + ext.prefix = { "${meta.id}.${meta.variantcaller}.preprocess.${meta.chunk}" } + publishDir = [ + enabled: false + ] + } + + withName: '.*:VCF_VARLOCIRAPTOR_GERMLINE:VARLOCIRAPTOR_CALLVARIANTS|.*:VCF_VARLOCIRAPTOR_TUMOR_ONLY:VARLOCIRAPTOR_CALLVARIANTS' { + ext.prefix = { "${meta.id}.${meta.variantcaller}.call.${meta.chunk}" } + publishDir = [ + enabled: false + ] + } + + withName: SORT_CALLED_CHUNKS { + ext.args = { '--output-type z --write-index=tbi' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + saveAs: { file -> params.varlociraptor_chunk_size > 1 ? null : "varlociraptor/${meta.id}/${file}" }, + ] + } + + withName: '.*:VCF_VARLOCIRAPTOR_GERMLINE:SORT_CALLED_CHUNKS' { + ext.prefix = { params.varlociraptor_chunk_size > 1 ? "${meta.id}.${meta.variantcaller}.sort.${meta.chunk}" : "${meta.id}.${meta.variantcaller}.germline.varlociraptor" } + } + + withName: '.*:VCF_VARLOCIRAPTOR_TUMOR_ONLY:SORT_CALLED_CHUNKS' { + ext.prefix = { params.varlociraptor_chunk_size > 1 ? "${meta.id}.${meta.variantcaller}.sort.${meta.chunk}" : "${meta.id}.${meta.variantcaller}.tumor_only.varlociraptor" } + } + + withName: '.*:VCF_VARLOCIRAPTOR_GERMLINE:CONCAT_CALLED_CHUNKS' { + ext.prefix = { "${meta.id}.${meta.variantcaller}.germline.varlociraptor.concat" } + ext.args = { '--allow-overlaps --output-type z' } + publishDir = [ + enabled: false + ] + } + + withName: '.*:VCF_VARLOCIRAPTOR_GERMLINE:SORT_FINAL_VCF' { + ext.prefix = { "${meta.id}.${meta.variantcaller}.germline.varlociraptor" } + ext.args = { '--output-type z --write-index=tbi' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/varlociraptor/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*:VCF_VARLOCIRAPTOR_TUMOR_ONLY:CONCAT_CALLED_CHUNKS' { + ext.prefix = { "${meta.id}.${meta.variantcaller}.tumor_only.varlociraptor.concat" } + ext.args = { '--allow-overlaps --output-type z' } + publishDir = [ + enabled: false + ] + } + + withName: '.*:VCF_VARLOCIRAPTOR_TUMOR_ONLY:SORT_FINAL_VCF' { + ext.prefix = { "${meta.id}.${meta.variantcaller}.tumor_only.varlociraptor" } + ext.args = { '--output-type z --write-index=tbi' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/varlociraptor/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + // + // VCF_VARLOCIRAPTOR_SOMATIC MODULES + // + withName: '.*:VCF_VARLOCIRAPTOR_SOMATIC:ALIGNMENTPROPERTIES_NORMAL' { + ext.prefix = { "${meta.normal_id}.normal" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/varlociraptor/${meta.id}/" }, + ] + } + + withName: '.*:VCF_VARLOCIRAPTOR_SOMATIC:TABIX_GERMLINE|.*:VCF_VARLOCIRAPTOR_SOMATIC:TABIX_SOMATIC' { + publishDir = [ + enabled: false + ] + } + + withName: '.*:VCF_VARLOCIRAPTOR_SOMATIC:CONCAT_SOMATIC_STRELKA' { + ext.prefix = { "${meta.id}.strelka" } + ext.args = { '--allow-overlaps --output-type z --write-index=tbi' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/strelka/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*:VCF_VARLOCIRAPTOR_SOMATIC:MERGE_GERMLINE_SOMATIC_VCFS' { + ext.prefix = { "${meta.id}.${meta.variantcaller}.merged" } + ext.args = { '--output-type z --force-samples' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/varlociraptor/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*:VCF_VARLOCIRAPTOR_SOMATIC:RBT_VCFSPLIT' { + ext.prefix = { "${meta.id}.${meta.variantcaller}.split" } + publishDir = [ + enabled: false + ] + } + + withName: '.*:VCF_VARLOCIRAPTOR_SOMATIC:PREPROCESS_NORMAL' { + ext.prefix = { "${meta.normal_id}.${meta.variantcaller}.preprocess.${meta.chunk}" } + publishDir = [ + enabled: false + ] + } + + withName: '.*:VCF_VARLOCIRAPTOR_SOMATIC:ALIGNMENTPROPERTIES_TUMOR' { + ext.prefix = { "${meta.tumor_id}.tumor" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/varlociraptor/${meta.id}/" }, + ] + } + + withName: '.*:VCF_VARLOCIRAPTOR_SOMATIC:PREPROCESS_TUMOR' { + ext.prefix = { "${meta.tumor_id}.${meta.variantcaller}.preprocess.${meta.chunk}" } + publishDir = [ + enabled: false + ] + } + + withName: '.*:VCF_VARLOCIRAPTOR_SOMATIC:VARLOCIRAPTOR_CALLVARIANTS' { + ext.prefix = { "${meta.id}.${meta.variantcaller}.call.${meta.chunk}" } + publishDir = [ + enabled: false + ] + } + + withName: '.*:VCF_VARLOCIRAPTOR_SOMATIC:SORT_CALLED_CHUNKS' { + ext.prefix = { params.varlociraptor_chunk_size > 1 ? "${meta.id}.${meta.variantcaller}.sort.${meta.chunk}" : "${meta.id}.${meta.variantcaller}.somatic.varlociraptor" } + } + + withName: '.*:VCF_VARLOCIRAPTOR_SOMATIC:CONCAT_CALLED_CHUNKS' { + ext.prefix = { "${meta.id}.${meta.variantcaller}.somatic.varlociraptor.concat" } + ext.args = { '--allow-overlaps --output-type z' } + publishDir = [ + enabled: false + ] + } + + withName: '.*:VCF_VARLOCIRAPTOR_SOMATIC:SORT_FINAL_VCF' { + ext.prefix = { "${meta.id}.${meta.variantcaller}.somatic.varlociraptor" } + ext.args = { '--output-type z --write-index=tbi' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/varlociraptor/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } +} diff --git a/conf/test.config b/conf/test.config index 10e420284a..beffbc64bc 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,116 +1,92 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Nextflow config file for running minimal tests -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/sarek -profile test,, - ----------------------------------------------------------------------------------------- + nextflow run nf-core/sarek -profile test,, --outdir +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params { +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '2.h' + ] + withName:BWAMEM2_INDEX { + memory = { 6.GB } + } +} +params { config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '8.h' + // Base directory for nf-core/modules test data + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' // Input data - input = "${baseDir}/tests/csv/3.0/fastq_single.csv" + input = "${projectDir}/tests/csv/3.0/fastq_single.csv" - // Small reference genome - igenomes_ignore = true - genome = 'small_hg38' - genomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules' + // small genome on igenomes + igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + genome = 'testdata.nf-core.sarek' - dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" - fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.fasta" - germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz" - intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/genome.interval_list" - known_indels = "${params.genomes_base}/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz" - nucleotides_per_second = 20 + // Small reference genome + bcftools_annotations = "${params.modules_testdata_base_path}/genomics/sarscov2/illumina/vcf/test2.vcf.gz" + bcftools_annotations_tbi = "${params.modules_testdata_base_path}/genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi" + bcftools_header_lines = "${projectDir}/tests/config/bcfann_test_header.txt" - snpeff_db = 'WBcel235.99' - vep_species = 'caenorhabditis_elegans' - vep_cache_version = '104' + // Sentieon + sentieon_dnascope_model = "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38/Annotation/Sentieon/SentieonDNAscopeModel1.1.model" - // Ignore `--input` as otherwise the parameter validation will throw an error - schema_ignore_params = 'genomes,input' + // default params + split_fastq = 0 // no FASTQ splitting + tools = 'strelka' // Variant calling with Strelka } -profiles { - annotation { - params.genome = 'WBcel235' - params.igenomes_ignore = false - params.input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/tiny-vcf-https.csv' - params.step = 'annotate' - } - pair { - params.input = "${baseDir}/tests/csv/3.0/fastq_pair.csv" - } - prepare_recalibration { - params.input = "${baseDir}/tests/csv/3.0/mapped_single.csv" - params.step = 'prepare_recalibration' - } - save_bam_mapped { - params.save_bam_mapped = true - } - skip_markduplicates { - params.skip_markduplicates = true - } - split_fastq { - params.split_fastq = 150000 - params.save_split_fastqs = true - } - no_intervals { - params.no_intervals = true - } - targeted { - params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/multi_intervals.bed" - params.wes = true - } - tools { - params.input = "${baseDir}/tests/csv/3.0/recalibrated.csv" - params.dbsnp = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz" - params.fasta = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" - params.germline_resource = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz" - params.intervals = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" - params.pon = "${params.genomes_base}/data/genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz" - params.step = 'variant_calling' - params.tools = 'deepvariant,freebayes,haplotypecaller,manta,msisensorpro,mutect2,strelka,snpeff,vep' //tiddit - params.joint_germline = true - params.wes = true - params.genome = 'WBcel235' - params.vep_genome = 'WBcel235' - //params.vep_cache = - } - trimming { - params.clip_r1 = 1 - params.clip_r2 = 1 - params.three_prime_clip_r1 = 1 - params.three_prime_clip_r2 = 1 - params.trim_fastq = true - } - use_gatk_spark { - params.use_gatk_spark = 'bqsr,markduplicates' - } - umi { - params.input = "${baseDir}/tests/csv/3.0/fastq_umi.csv" - params.umi_read_structure = '7M1S+T' +process { + withName:'.*:FREEC_SOMATIC'{ + ext.args = { + [ + "sample":[ + inputformat: "pileup", + mateorientation: "FR" + ], + "general":[ + bedgraphoutput: "TRUE", + noisydata: "TRUE", + minexpectedgc: "0", + readcountthreshold: "1", + sex: meta.sex, + window: "10", + ], + "control":[ + inputformat: "pileup", + mateorientation: "FR" + ] + ] + } } -} -//This is apparently useless as it won't overwrite things in the modules.config -process { - withName:SNPEFF { - maxForks = 1 + withName: '.*:FILTERVARIANTTRANCHES'{ + ext.args = { "--info-key CNN_1D --indel-tranche 0" } } - withName:ENSEMBLVEP { - maxForks = 1 + + withName: '.*:PARABRICKS_FQ2BAM' { + accelerator = { task.executor in ['awsbatch','google-batch','hq','k8s'] ? 1 : null } + ext.args = { [ + // Using specific read group tags for mutect compability + "--read-group-id-prefix ${meta.sample_lane_id}", + "--read-group-sm ${meta.patient}_${meta.sample}", + "--read-group-lb ${meta.sample}", + "--read-group-pl ${params.seq_platform}", + // Using -B 3 for tumor samples + meta.status == 1 ? "--bwa-options='-K 100000000 -Y -B 3'" : "--bwa-options='-K 100000000 -Y'", + // Use low memory mode for tests + "--low-memory", + ].join(' ').trim() } } } diff --git a/conf/test_full.config b/conf/test_full.config index b5581eaa58..b4aa7eb84a 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -1,11 +1,11 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Nextflow config file for running full-size tests -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/sarek -profile test_full, + nextflow run nf-core/sarek -profile test_full, --outdir ---------------------------------------------------------------------------------------- */ @@ -15,14 +15,27 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/HCC1395_WXS_somatic_full_test.csv' - // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed + // Other params + tools = 'ascat,cnvkit,controlfreec,freebayes,lofreq,manta,msisensor2,muse,mutect2,ngscheckmate,strelka,tiddit,snpeff,vep' + split_fastq = 20000000 + intervals = 's3://ngi-igenomes/test-data/sarek/S07604624_Padded_Agilent_SureSelectXT_allexons_V6_UTR.bed' + wes = true + filter_vcfs = true + normalize_vcfs = true + snv_consensus_calling = true +} - // Ignore `--input` as otherwise the parameter validation will throw an error - schema_ignore_params = 'genomes,input,modules' +process { + // Request a GPU for every parabricks process + // NOTE https://docs.nvidia.com/clara/parabricks/latest/gettingstarted.html#hardware-requirements + withName: 'PARABRICKS_.*' { + cpus = { 48 * task.attempt } + memory = { task.attempt > 1 ? '370.GB' : '186.GB' } + time = { 1.h * task.attempt } + resourceLimits = [cpus: 96, memory: 370.GB] + maxRetries = 3 + errorStrategy = { task.exitStatus in [1,2,143,137,104,134,139,255] ? 'retry' : 'finish' } + } } diff --git a/conf/test_full_germline.config b/conf/test_full_germline.config new file mode 100644 index 0000000000..9d8146dc15 --- /dev/null +++ b/conf/test_full_germline.config @@ -0,0 +1,24 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/sarek -profile test_full_germline, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile for germline VC' + config_profile_description = 'Full test dataset to check germline VC pipeline function' + + // Input data for full size test + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/NA12878_WGS_30x_full_test.csv' + + // Other params + tools = 'cnvkit,deepvariant,freebayes,haplotypecaller,indexcov,manta,strelka,tiddit,snpeff,vep' + split_fastq = 50000000 + filter_vcfs = true +} diff --git a/conf/test_full_germline_ncbench_agilent.config b/conf/test_full_germline_ncbench_agilent.config new file mode 100644 index 0000000000..879a20da33 --- /dev/null +++ b/conf/test_full_germline_ncbench_agilent.config @@ -0,0 +1,25 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/sarek -profile test_full_germline_ncbench_agilent, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile for germline VC' + config_profile_description = 'Agilent test datasets to benchmark germline VC pipeline function using the NCBench framework' + + // Input data for full size test + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/NA12878_Agilent_full_test.csv' + + // Other params + tools = 'deepvariant,freebayes,haplotypecaller,manta,strelka,snpeff,vep' + intervals = 's3://ngi-igenomes/test-data/sarek/Agilent_v7.bed' + wes = true + trim_fastq = true +} diff --git a/conf/test_mutect2.config b/conf/test_mutect2.config new file mode 100644 index 0000000000..1db2c27ceb --- /dev/null +++ b/conf/test_mutect2.config @@ -0,0 +1,6 @@ +process { + withName: '.*:MUTECT2_PAIRED'{ + //sample name from when the test data was generated + ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal" } + } +} diff --git a/docs/DEVELOPER_GUIDELINES.md b/docs/DEVELOPER_GUIDELINES.md new file mode 100644 index 0000000000..eac5205ee6 --- /dev/null +++ b/docs/DEVELOPER_GUIDELINES.md @@ -0,0 +1,1003 @@ +# nf-core/sarek Developer Guidelines + +This document provides comprehensive guidelines for contributing to the nf-core/sarek pipeline. These guidelines are designed for both human developers and AI agents. + +## Table of Contents + +- [Contributing Principles](#contributing-principles) +- [Git Workflow](#git-workflow) +- [Codebase Architecture](#codebase-architecture) +- [Code Style](#code-style) +- [Channel Operations and Gotchas](#channel-operations-and-gotchas) +- [Meta Map Handling](#meta-map-handling) +- [Modules](#modules) +- [Subworkflows](#subworkflows) +- [Configuration](#configuration) +- [Testing](#testing) +- [Documentation](#documentation) +- [Metro Map Updates](#metro-map-updates) +- [PR Checklist](#pr-checklist) + +--- + +## Contributing Principles + +- **One PR, one feature** — scope each PR to a single change; keep it as minimal as possible +- **Read files before editing** — understand existing code before making changes +- Keep fixes **minimal and focused** — don't refactor surrounding code +- Don't add docstrings, comments, or type annotations to unchanged code +- Don't add error handling or validation beyond what's needed +- Don't over-engineer: no premature abstractions, no feature flags +- When unsure about scope or approach, ask rather than guess + +--- + +## Git Workflow + +- **Always branch off `origin/dev`**, never master +- Branch naming: `fix/issue-XXXX` or `feat/issue-XXXX` +- PRs target the `dev` branch +- Never force push, never amend published commits without asking +- Commit messages should be descriptive and include the issue reference + +--- + +## Codebase Architecture + +Sarek follows a hierarchical, modular architecture: + +``` +Modules (atomic processes) → Subworkflows (composed modules) → Workflow (orchestration) +``` + +**Key design principles:** + +- Separation of concerns between processing steps +- Reusable components through nf-core modules ecosystem +- Configuration-driven behavior via `ext.*` directives +- Comprehensive testing with nf-test + +### Directory Structure + +``` +sarek/ +├── main.nf # Pipeline entry point +├── nextflow.config # Main configuration +├── nextflow_schema.json # Parameter schema (JSON Schema) +├── modules.json # nf-core module tracking +├── modules/ +│ ├── local/ # Pipeline-specific modules +│ └── nf-core/ # Imported nf-core modules +├── subworkflows/ +│ ├── local/ # Pipeline-specific subworkflows +│ └── nf-core/ # Imported nf-core subworkflows +├── workflows/sarek/main.nf # Main workflow orchestration +├── conf/ +│ ├── base.config # Default resource allocations +│ ├── modules/ # Module-specific configurations +│ └── test/ # Test configurations +├── tests/ # nf-test test files +├── docs/ # Documentation +└── assets/ # MultiQC config, samplesheets, etc. +``` + +--- + +## Code Style + +### Harshil Alignment + +Use "Harshil alignment" for include statements - align the closing braces to improve readability: + +```groovy +// CORRECT - Harshil alignment +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../../subworkflows/local/utils_nfcore_sarek_pipeline' + +// CORRECT - With aliases +include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../../subworkflows/local/bam_convert_samtools' +include { SPRING_DECOMPRESS as SPRING_DECOMPRESS_TO_R1_FQ } from '../../modules/nf-core/spring/decompress' +include { SPRING_DECOMPRESS as SPRING_DECOMPRESS_TO_R2_FQ } from '../../modules/nf-core/spring/decompress' + +// INCORRECT - No alignment +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +``` + +### Harshil Alignment in Take/Emit Blocks + +Also apply alignment to `take:` and `emit:` blocks: + +```groovy +take: +cram // channel: [mandatory] [ meta, cram, crai ] +dict // channel: [optional] [ meta, dict ] +fasta // channel: [mandatory] [ fasta ] +fasta_fai // channel: [mandatory] [ fasta_fai ] +intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi, num_intervals ] + +emit: +vcf_ann // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] +tab_ann +json_ann +reports // path: *.html +versions // path: versions.yml +``` + +### Channel Naming Conventions + +```groovy +// Initial process output channel +ch_output_from_ + +// Intermediate/terminal channels +ch__for_ + +// Example +ch_bam_from_markduplicates +ch_markduplicates_for_baserecalibrator +``` + +### Topic Channels + +We are migrating to **Nextflow topic channels** where possible. Topics allow processes and subworkflows to publish to a named topic without explicit channel wiring. + +When installing or updating an nf-core module, check if it publishes `versions` or `multiqc` outputs via topics. If it does, use the topic and remove the explicit `.mix()` wiring for those channels. + +```groovy +// OLD - Explicit version/report collection +versions = versions.mix(TOOL_A.out.versions) +ch_multiqc_files = ch_multiqc_files.mix(TOOL_A.out.report) + +// NEW - If the module uses topics, remove the .mix() lines above. +// The module already publishes to the topic internally. +// Collect from the topic in the top-level workflow: +// ch_versions = Channel.topic('versions') +``` + +### General Style + +- Use 4-space indentation +- Put channel operations on separate lines for readability +- Add comments for complex logic +- Use descriptive variable names + +### Strict Syntax Mode + +**When touching any code in a PR, you must update it to use strict Nextflow syntax.** This ensures gradual modernization of the codebase. + +#### Required Changes When Modifying Code + +1. **Use explicit `it` variable or named parameters in closures:** + + ```groovy + // CORRECT - Explicit named parameters + .map { meta, vcf -> [meta, vcf] } + + // CORRECT - Explicit `it` when single parameter + .map { it -> it.baseName } + + // DEPRECATED - Implicit `it` + .map { it.baseName } + ``` + +2. **Explicit type declarations where applicable:** + + ```groovy + // CORRECT + String prefix = "${meta.id}" + List args = [] + + // AVOID in new code + def prefix = "${meta.id}" + ``` + +3. **Use underscore prefix for unused/dropped variables:** + + The underscore prefix convention clearly indicates which variables from a closure are intentionally not used in the output. This makes code review easier and prevents confusion about whether a variable was accidentally omitted. + + ```groovy + // CORRECT - Underscore prefix shows vcf is intentionally dropped + .map { meta, _vcf, tbi -> [meta, tbi] } + + // CORRECT - Multiple dropped variables + .map { meta, _vcf, _tbi, file -> [meta, file] } + + // CORRECT - In join operations + .join(other_channel, failOnDuplicate: true, failOnMismatch: true) + .map { meta, file1, _file2 -> [meta, file1] } + + // CORRECT - When extracting from complex structures + VCF_ANNOTATE_SNPEFF.out.vcf_tbi.map { meta, vcf_, _tbi -> [meta, vcf_, []] } + + // INCORRECT - Unclear which variables are intentionally unused + .map { meta, vcf, tbi -> [meta, tbi] } + ``` + + **When to use underscore prefix:** + - Variable is received but not included in output + - Variable is needed for destructuring but value is discarded + - Makes intent clear during code review + +--- + +## Channel Operations and Gotchas + +### Join Operations - ALWAYS Use `failOnDuplicate` and `failOnMismatch` + +When joining channels, ALWAYS specify `failOnDuplicate: true, failOnMismatch: true` to catch bugs early: + +```groovy +// CORRECT - Will fail fast if there are issues +vcf_tbi = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true) + +// INCORRECT - Silent failures can cause subtle bugs +vcf_tbi = vcf.join(tbi) +``` + +Use `remainder: true` only when intentionally handling unmatched items: + +```groovy +// When some items may not have matches (intentional) +all_unmapped_bam = SAMTOOLS_VIEW_UNMAP_UNMAP.out.bam + .join(SAMTOOLS_VIEW_UNMAP_MAP.out.bam, failOnDuplicate: true, remainder: true) + .join(SAMTOOLS_VIEW_MAP_UNMAP.out.bam, failOnDuplicate: true, remainder: true) +``` + +### Branch Operations + +Use `branch` to split channels based on conditions: + +```groovy +vcf_out = STRELKA_SINGLE.out.vcf.branch{ + // Use meta.num_intervals to assess number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 +} + +// Access branches +vcf_out.intervals // Items where num_intervals > 1 +vcf_out.no_intervals // Items where num_intervals <= 1 +``` + +### GroupTuple - Use `groupKey` for Performance + +When using `groupTuple`, use `groupKey` with known size to avoid blocking: + +```groovy +// CORRECT - Non-blocking when size is known +vcf_to_merge = vcf_out.intervals + .map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]} + .groupTuple() + +// NOTE: Without groupKey and size, groupTuple is a blocking operation +// This can cause pipeline hangs if the expected number of items varies +``` + +### Strelka Special Case - SNV and Indel VCFs + +Strelka produces TWO VCF files (SNVs and Indels) that need special handling: + +```groovy +// Strelka somatic outputs need to be concatenated before consensus calling +ch_vcfs = vcfs.branch{ meta, vcf, tbi -> + strelka_somatic: meta.variantcaller == 'strelka' && meta.status == '1' + other: true +} + +// Concatenate the two strelka VCFs (SNPs and indels) using groupTuple(size: 2) +BCFTOOLS_CONCAT(ch_vcfs.strelka_somatic.groupTuple(size: 2)) +``` + +### Combine vs Join + +- Use `join` when combining channels by a key (meta map) +- Use `combine` when creating cartesian product (e.g., sample x intervals) + +```groovy +// Join by meta key +vcf_tbi = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true) + +// Combine all samples with all intervals (cartesian product) +cram_intervals = cram.combine(intervals) +``` + +### Controlling Flow with Channel Operations (Preferred) + +**Nextflow is a dataflow language.** Prefer channel operations over `if` statements to control which processes run: + +```groovy +// BEST - Use filter to control what enters a process +input_channel + .filter { meta, _file -> params.tools?.split(',')?.contains('toolname') } + .set { ch_for_tool } + +TOOL_PROCESS(ch_for_tool) + +// BEST - Use branch for multiple conditional paths +input_channel.branch { meta, file -> + tool_a: params.tools?.split(',')?.contains('tool_a') + tool_b: params.tools?.split(',')?.contains('tool_b') + other: true +}.set { ch_branched } + +TOOL_A(ch_branched.tool_a) +TOOL_B(ch_branched.tool_b) + +// AVOID - if statements for flow control (use only when channel ops aren't suitable) +if (params.run_tool) { + TOOL_PROCESS(input_channel) +} +``` + +**Benefits of channel operations:** + +- More idiomatic Nextflow - data drives execution +- Better composability and testability +- Clearer dataflow visualization +- Avoids caching issues when conditions change + +--- + +## Meta Map Handling + +### Adding Fields to Meta + +Use `meta + [key: value]` syntax: + +```groovy +// Add single field +meta = meta + [id: meta.sample] + +// Add multiple fields +meta = meta + [id: "${meta.sample}-${meta.lane}".toString(), data_type: "fastq_gz", num_lanes: num_lanes.toInteger()] + +// In map operation +.map{ meta, vcf -> [ meta + [ variantcaller:'strelka' ], vcf ] } +``` + +### Removing Fields from Meta - Use `subMap` + +Use `meta - meta.subMap('field')` to remove fields: + +```groovy +// Remove single field +.map{ meta, vcf -> [ meta - meta.subMap('num_intervals'), vcf ] } + +// Remove multiple fields +.map{ meta, vcf, tbi -> + [meta - meta.subMap('variantcaller', 'contamination', 'filename'), vcf, tbi] +} + +// Add and remove in one operation +.map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'strelka' ], vcf ] } +``` + +### Accessing Meta Fields + +```groovy +// In map closures +.map{ meta, file -> [meta.sample, file] } + +// In branch conditions +.branch{ meta, vcf -> + intervals: meta.num_intervals > 1 + no_intervals: meta.num_intervals <= 1 +} + +// Getting subset of meta +[meta.patient, meta.subMap('sample', 'status')] +``` + +### Common Meta Fields in Sarek + +| Field | Description | +| -------------------- | --------------------------------------------- | +| `meta.patient` | Patient identifier | +| `meta.sample` | Sample identifier | +| `meta.status` | 0 = normal, 1 = tumor | +| `meta.lane` | Sequencing lane | +| `meta.id` | Unique identifier (often `${sample}-${lane}`) | +| `meta.data_type` | Input type: `fastq_gz`, `bam`, `cram` | +| `meta.num_intervals` | Number of intervals for scatter/gather | +| `meta.variantcaller` | Name of variant caller | +| `meta.num_lanes` | Total number of lanes for sample | + +--- + +## Modules + +### DEPRECATED: The `ext.when` Clause Pattern + +> **DEPRECATED:** The `ext.when` clause pattern is deprecated and should NOT be used in new code. Existing code using this pattern should be refactored when touched in a PR. + +You may see comments in older subworkflow files like: + +```groovy +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run +``` + +**Do not follow this pattern for new code.** Instead, prefer channel operations to control dataflow: + +```groovy +// BEST - Use channel operations (filter, branch) to control dataflow +input_channel + .filter { meta, _file -> params.tools?.split(',')?.contains('toolname') } + .set { ch_for_tool } + +TOOL_PROCESS(ch_for_tool) + +// ACCEPTABLE - When channel operations aren't suitable, use explicit conditional +if (tools && tools.split(',').contains('toolname')) { + TOOL_PROCESS(input_channel) + versions = versions.mix(TOOL_PROCESS.out.versions) +} + +// DEPRECATED - Using ext.when in config +// withName: 'TOOL_PROCESS' { +// ext.when = { params.tools && params.tools.split(',').contains('toolname') } +// } +``` + +**Why channel operations are preferred:** + +- Nextflow is a dataflow language - let the data drive execution +- Channel operations are more composable and testable +- Avoids issues with process caching when conditions change +- Makes the pipeline logic more explicit and traceable + +### Remapping Channels for Module Input + +When a module expects different input structure, remap in the call: + +```groovy +// Remap channel to match module/subworkflow input signature +BAM_VARIANT_CALLING_CNVKIT( + cram.map{ meta, cram, crai -> [ meta, [], cram ] }, + fasta, + fasta_fai, + intervals_bed_combined.map{it -> it ? [[id:it[0].baseName], it]: [[id:'no_intervals'], []]}, + params.cnvkit_reference ? cnvkit_reference.map{ it -> [[id:it[0].baseName], it] } : [[:],[]] +) +``` + +### Module Memory Requirements + +Some modules have specific memory requirements noted in comments: + +```groovy +// In modules/nf-core/bwa/index/main.nf: +// NOTE requires 5.37N memory where N is the size of the database + +// In modules/nf-core/bwamem2/index/main.nf: +// NOTE Requires 28N GB memory where N is the size of the reference sequence, floor of 280M +``` + +### Adding/Updating nf-core Modules + +```bash +# Install a new module +nf-core modules install / + +# Update an existing module +nf-core modules update / + +# List installed modules +nf-core modules list local +``` + +--- + +## Subworkflows + +### Subworkflow Naming Patterns + +| Category | Naming Pattern | Examples | +| --------------- | ----------------------- | ----------------------------------------- | +| Alignment | `fq_align_*` | `fq_align_bwamem`, `fq_align_bwamem2` | +| BAM processing | `bam_*` | `bam_markduplicates`, `bam_applybqsr` | +| Variant calling | `bam_variant_calling_*` | `bam_variant_calling_germline_all` | +| VCF processing | `vcf_*` | `vcf_annotate_all`, `vcf_concat_variants` | +| Preparation | `prepare_*` | `prepare_genome`, `prepare_intervals` | + +### Subworkflow Structure + +```groovy +// +// DESCRIPTION OF SUBWORKFLOW +// + +include { MODULE_A } from '../../../modules/nf-core/module_a' +include { MODULE_B } from '../../../modules/nf-core/module_b' +include { MODULE_B as MODULE_B_ALIAS } from '../../../modules/nf-core/module_b' + +workflow SUBWORKFLOW_NAME { + take: + input_channel // channel: [mandatory] [ meta, file ] + other_inputs // channel: [optional] description + + main: + versions = Channel.empty() + + // Initialize output channels + output_a = Channel.empty() + output_b = Channel.empty() + + // PREFERRED: Use channel operations to control dataflow + ch_for_module_a = input_channel + .filter { meta, _file -> meta.run_module_a } + + MODULE_A(ch_for_module_a) + versions = versions.mix(MODULE_A.out.versions) + + MODULE_B(MODULE_A.out.result) + versions = versions.mix(MODULE_B.out.versions) + + emit: + result = MODULE_B.out.result // channel: [ val(meta), file ] + versions // channel: versions.yml +} +``` + +### Scatter-Gather Pattern + +Common pattern for parallelizing over intervals: + +```groovy +// Combine samples with intervals for scatter strategy +cram_intervals = cram.combine(intervals) + // Move num_intervals to meta map for later grouping + .map{ meta, cram, crai, intervals, intervals_index, num_intervals -> + [ meta + [ num_intervals:num_intervals ], cram, crai, intervals, intervals_index ] + } + +// Run process on each interval +PROCESS(cram_intervals, fasta, fasta_fai) + +// Gather: Branch by whether intervals were used +vcf_out = PROCESS.out.vcf.branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 +} + +// Merge interval results +vcf_to_merge = vcf_out.intervals + .map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]} + .groupTuple() + +MERGE_VCFS(vcf_to_merge, dict) + +// Combine merged and non-interval results, clean up meta +vcf_final = Channel.empty() + .mix(MERGE_VCFS.out.vcf, vcf_out.no_intervals) + .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'toolname' ], vcf ] } +``` + +--- + +## Configuration + +### Module Configuration Files + +Module behavior is controlled via `conf/modules/.config`: + +```groovy +process { + withName: 'NEWTOOL_PROCESS' { + ext.args = { params.newtool_args ?: '' } + ext.prefix = { "${meta.id}.newtool" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/newtool/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } +} +``` + +> **Note:** Older config files wrap process blocks in `if (params.tools && params.tools.split(',').contains('tool'))` guards. Do **not** use this pattern in new code — control which processes run via channel operations (`filter`, `branch`) in the workflow/subworkflow instead. When touching existing config files, remove these guards. + +### Resource Labels + +Use standard nf-core labels in `conf/base.config`: + +| Label | CPUs | Memory | Time | +| --------------------- | ---- | ------ | ---- | +| `process_single` | 1 | 6 GB | 8 h | +| `process_low` | 2 | 12 GB | 8 h | +| `process_medium` | 6 | 36 GB | 16 h | +| `process_high` | 12 | 72 GB | 32 h | +| `process_long` | - | - | 40 h | +| `process_high_memory` | - | 200 GB | - | + +### Adding New Parameters + +1. **Add to `nextflow.config`** with default value: + + ```groovy + params { + new_param = false + } + ``` + +2. **Update schema** using nf-core tools: + + ```bash + nf-core pipelines schema build + ``` + +3. **Add validation** if needed in the workflow + +--- + +## Testing + +### Test Framework + +Sarek uses **nf-test** for testing. Tests are in `tests/` directory. + +### Running Tests + +```bash +# Run all tests +nf-test test --profile debug,test,docker --verbose + +# Run specific test +nf-test test tests/variant_calling_haplotypecaller.nf.test --profile debug,test,docker + +# Run with stub mode (faster, no actual execution) +nf-test test tests/default.nf.test --profile debug,test,docker -stub + +# Update snapshots when outputs legitimately change +nf-test test tests/my_test.nf.test --profile debug,test,docker --update-snapshot +``` + +### Test Structure with UTILS.groovy + +Tests use a scenario-based pattern: + +```groovy +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + name "Test pipeline" + script "../main.nf" + tag "pipeline" + tag "pipeline_sarek" + + def test_scenario = [ + [ + name: "Test scenario name", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_single.csv", + tools: 'haplotypecaller' + ] + ], + [ + name: "Test with stub", + params: [], + stub: true + ], + [ + name: "Fails with invalid input", + params: [ + input: "${projectDir}/tests/csv/3.0/vcf_single.csv", + step: 'annotate', + vep_cache_version: 1, + build_only_index: true, + tools: 'vep' + ], + failure: true, + stdout: "Expected error message" + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.get_test(scenario)) + } +} +``` + +### Test Scenario Options + +| Option | Description | +| ------------------------------ | ----------------------------------------- | +| `name` | Test name (descriptive) | +| `params` | Map of parameters to set | +| `stub` | Run in stub mode (boolean) | +| `failure` | Expect test to fail (boolean) | +| `stdout` | Expected stdout content for failure tests | +| `gpu` | GPU test (adds gpu tag) | +| `no_conda` | Incompatible with conda | +| `include_muse_txt` | Include MuSE txt in assertions | +| `include_freebayes_unfiltered` | Include freebayes unfiltered VCFs | +| `no_vcf_md5sum` | Use VCF summary instead of md5 | + +--- + +## Documentation + +### Documentation Requirements + +**Any change that affects pipeline output or adds new functionality MUST include documentation updates.** + +### Documentation Files + +| File | Purpose | When to Update | +| ---------------- | ------------------------ | ----------------------------------------------- | +| `README.md` | Pipeline overview | **New tools** (add to overview/tool list) | +| `docs/usage.md` | Usage instructions | New parameters, new tools, input format changes | +| `docs/output.md` | Output file descriptions | **Any change to outputs**, new tools | +| `CHANGELOG.md` | Version history | Every PR | +| `CITATIONS.md` | Tool citations | New tools | +| `docs/images/` | Metro maps, diagrams | **New tools**, workflow changes | + +### New Tool Documentation Checklist + +When adding a new tool, you **MUST** update ALL of the following: + +1. **`README.md`** - Add tool to the pipeline overview/feature list +2. **`docs/usage.md`** - Document all new parameters and usage instructions +3. **`docs/output.md`** - Document all output files produced by the tool +4. **`docs/images/sarek_subway.*`** - Add tool to the metro map (SVG and PNG) +5. **`CITATIONS.md`** - Add tool citation +6. **`CHANGELOG.md`** - Document the addition + +### Output Changes Documentation + +Any PR that changes pipeline outputs (new files, changed file names, different content) **MUST** update: + +1. **`docs/output.md`** - Reflect the new/changed outputs +2. **`CHANGELOG.md`** - Note the change under appropriate section + +### CHANGELOG Format + +Follow [Keep a Changelog](https://keepachangelog.com/) format. + +**Important conventions:** + +- Entries reference the **PR number**, not the issue number: + ``` + - [#XXX](https://github.com/nf-core/sarek/pull/XXX) - Description of change + ``` +- Use `XXX` as placeholder when no PR exists yet +- The issue number goes in the **PR description body** (for auto-close), not the changelog +- Entries within each section are in **ascending order** by PR number + +```markdown +## [Unreleased] + +### Added + +- [#PR_NUMBER](https://github.com/nf-core/sarek/pull/PR_NUMBER) - Description + +### Changed + +### Fixed + +### Removed + +### Dependencies + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| tool_name | 1.0.0 | 1.1.0 | + +### Parameters + +| Params | status | +| ------------- | ------ | +| `--new_param` | New | + +### Developer section + +#### Added + +#### Changed + +#### Fixed + +#### Removed +``` + +### Output Documentation + +In `docs/output.md`, document each tool's outputs: + +```markdown +### Tool Name + +
+Output files + +- `path/to/output/` + - `*.extension`: Description of the file + +
+ +Brief description of what this tool produces. +``` + +--- + +## Metro Map Updates + +### Metro Map Files + +Located in `docs/images/`: + +- `sarek_subway.svg` / `sarek_subway.png` - Main pipeline flow +- `sarek_indices_subway.svg` / `sarek_indices_subway.png` - Index building flow + +### When to Update + +- Adding new tools or variant callers +- Adding new preprocessing steps +- Changing the pipeline flow +- Adding new post-processing options + +### Update Process + +1. Edit the SVG file (use Inkscape or similar) +2. Export to PNG +3. Follow nf-core [design guidelines](https://nf-co.re/developers/design_guidelines) +4. After release, checkout figures from `master` to `dev`: + ```bash + git checkout upstream/master -- docs/images/sarek_subway.svg + git checkout upstream/master -- docs/images/sarek_subway.png + ``` + +--- + +## PR Checklist + +### Before Submitting + +- [ ] PR targets `dev` branch (not `master`) +- [ ] Code follows Harshil alignment style +- [ ] **Any touched code updated to strict syntax** (explicit closure params, underscore for unused vars) +- [ ] **No new `ext.when` usage** - use channel operations instead +- [ ] **Prefer channel operations** (`filter`, `branch`) over `if` statements for flow control +- [ ] Pre-commit checks pass: `pre-commit run --all-files` +- [ ] All tests pass: `nf-test test --profile debug,test,docker` +- [ ] Linting passes: `nf-core pipelines lint` +- [ ] No debug mode warnings + +### For New Tools + +**Code:** + +- [ ] Module added/imported correctly +- [ ] Configuration in `conf/modules/.config` +- [ ] Test added in `tests/` +- [ ] MultiQC config updated (`assets/multiqc_config.yml`) if tool has MultiQC module + +**Documentation (ALL required):** + +- [ ] `README.md` - Tool added to pipeline overview/feature list +- [ ] `docs/usage.md` - All parameters documented with usage instructions +- [ ] `docs/output.md` - All output files documented +- [ ] `docs/images/sarek_subway.svg` - Tool added to metro map +- [ ] `docs/images/sarek_subway.png` - Exported PNG of updated metro map +- [ ] `CITATIONS.md` - Tool citation added +- [ ] `CHANGELOG.md` - Addition documented + +### For New Variant Callers + +Adding a variant caller touches **6 locations** — missing any of them causes silent bugs. All of the above "New Tools" items apply, plus: + +- [ ] **`nextflow_schema.json`** - Add to the `tools` parameter regex pattern +- [ ] **Dispatcher subworkflow** - Add call and wire outputs into `vcf_all.mix(...)`: + - `subworkflows/local/bam_variant_calling_germline_all/main.nf` (germline callers) + - `subworkflows/local/bam_variant_calling_somatic_all/main.nf` (somatic pair callers) + - `subworkflows/local/bam_variant_calling_tumor_only_all/main.nf` (tumor-only callers) + - Use channel operations (`filter`, `branch`) to control execution — not `if` blocks (existing `if` blocks are legacy) +- [ ] **`subworkflows/local/post_variantcalling/main.nf`** - Add to `small_variantcallers` list (for SNV callers eligible for normalization/filtering/consensus) or `excluded_variantcallers` (for SV callers). **Forgetting this silently excludes the caller from post-processing.** +- [ ] **Individual subworkflow** - Set `variantcaller` in meta map (e.g., `meta + [variantcaller: 'toolname']`) + +### For New Parameters + +- [ ] Default value in `nextflow.config` +- [ ] Schema updated: `nf-core pipelines schema build` +- [ ] Validation added (if needed) +- [ ] Documentation in `docs/usage.md` +- [ ] CHANGELOG updated + +### For Changes Affecting Pipeline Output + +Any PR that changes output files (new files, renamed files, changed content): + +- [ ] `docs/output.md` updated to reflect changes +- [ ] `CHANGELOG.md` updated +- [ ] If significant workflow change: metro map updated (`docs/images/sarek_subway.*`) + +--- + +## Common Gotchas + +### 1. Forgetting `failOnDuplicate`/`failOnMismatch` on Joins + +**Problem:** Silent data loss or incorrect pairing +**Solution:** Always use `join(..., failOnDuplicate: true, failOnMismatch: true)` + +### 2. Strelka Produces Two VCFs + +**Problem:** Strelka outputs SNV and Indel VCFs separately +**Solution:** Use `groupTuple(size: 2)` then `BCFTOOLS_CONCAT` before downstream processing + +### 3. Blocking GroupTuple + +**Problem:** `groupTuple()` without size blocks pipeline +**Solution:** Use `groupKey(meta, meta.num_intervals)` when size is known + +### 4. Meta Fields Persisting + +**Problem:** Temporary meta fields (like `num_intervals`) persist in output +**Solution:** Clean up with `meta - meta.subMap('field_name')` before emit + +### 5. DeepVariant Conda + +**Problem:** DeepVariant doesn't support Conda +**Solution:** Note in module: `// FIXME Conda is not supported at the moment` + +### 6. BWA Memory Requirements + +**Problem:** Unexpected OOM errors +**Solution:** BWA requires ~5.37N memory, BWA-MEM2 requires ~28N GB where N = reference size + +### 7. Using Deprecated `ext.when` Pattern + +**Problem:** Old code uses `ext.when` in config to control module execution +**Solution:** When touching this code, refactor to use channel operations (`filter`, `branch`) to control dataflow. Nextflow is a dataflow language - let the data drive execution. Avoid both `ext.when` AND `if` statements where possible. + +### 8. Forgetting to Register a New Variant Caller + +**Problem:** New variant caller runs and produces VCFs, but is silently excluded from normalization, filtering, and consensus calling +**Solution:** Must update all 6 registration points — see [For New Variant Callers](#for-new-variant-callers) checklist. The most commonly missed is `post_variantcalling/main.nf`'s `small_variantcallers` list. + +### 9. Implicit Variables in Closures + +**Problem:** Using implicit `it` makes code harder to read and review +**Solution:** Always use explicit named parameters in closures: `.map { meta, vcf -> ... }` not `.map { it[0], it[1] -> ... }` + +--- + +## Quick Reference + +### Essential Commands + +```bash +# Run tests +nf-test test --profile debug,test,docker + +# Lint pipeline +nf-core pipelines lint + +# Update schema +nf-core pipelines schema build + +# Install/update module +nf-core modules install / +nf-core modules update / +``` + +### Key Files for Common Changes + +| Change Type | Primary Files | +| ------------- | ----------------------------------------------- | +| New parameter | `nextflow.config`, `nextflow_schema.json` | +| New tool | `conf/modules/.config`, subworkflow, test | +| Bug fix | Relevant module/subworkflow, test | +| Documentation | `docs/usage.md`, `docs/output.md` | +| Any change | `CHANGELOG.md` | + +--- + +## Getting Help + +- **Slack:** [#sarek channel](https://nfcore.slack.com/channels/sarek) +- **GitHub Issues:** [nf-core/sarek/issues](https://github.com/nf-core/sarek/issues) +- **Documentation:** [nf-co.re/sarek](https://nf-co.re/sarek) diff --git a/docs/README.md b/docs/README.md index 1c3bb24c22..9ae0a5eaf5 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,9 +2,9 @@ The nf-core/sarek documentation is split into the following pages: -* [Usage](usage.md) - * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. -* [Output](output.md) - * An overview of the different results produced by the pipeline and how to interpret them. +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/abstracts/2018-05-PMC.md b/docs/abstracts/2018-05-PMC.md index 6f4ab2a166..22300860f5 100644 --- a/docs/abstracts/2018-05-PMC.md +++ b/docs/abstracts/2018-05-PMC.md @@ -26,10 +26,10 @@ Max Käller 2D, 7. Clinical Genetics, Dept. of Molecular Medicine and Surgery; 8. Dept. of Medical Sciences; 9. Dept. of Cell and Molecular Biology; -A. Uppsala University; -B. Dept. of Biochemistry and Biophysics; -C. Stockholm University; -D. School of Biotechnology, Division of Gene Technology, Royal Institute of Technology + A. Uppsala University; + B. Dept. of Biochemistry and Biophysics; + C. Stockholm University; + D. School of Biotechnology, Division of Gene Technology, Royal Institute of Technology We present Sarek, a complete Open Source pipeline to resolve germline and somatic variants from WGS data: it is written in Nextflow, a domain-specific language for workflow building. Sarek is based on GATK best practices to prepare short-read data, in parallel for a tumor/normal pair sample. diff --git a/docs/abstracts/2018-06-EACR25.md b/docs/abstracts/2018-06-EACR25.md index 6a4be1c8df..afe49fcfd5 100644 --- a/docs/abstracts/2018-06-EACR25.md +++ b/docs/abstracts/2018-06-EACR25.md @@ -21,7 +21,7 @@ Monica Nistér 3 ### Introduction Whole-genome sequencing of cancer tumours is more a research tool nowadays, but going to be used in clinical settings in -the near future to facilitate precision medicine. While large institutions have built up in-house bioinformatics +the near future to facilitate precision medicine. While large institutions have built up in-house bioinformatics solutions for their own data analysis, robust and portable workflows combining multiple software have been lacking, making it difficult for individual research groups to utilise the potential of this research field. Here we present Sarek, a robust, easy-to-install workflow for identification of both somatic and germline mutations from paired @@ -38,7 +38,7 @@ joint realignment around indels for both the tumour and the normal data. Reads a reference in an ALT-aware settings using BWA, however, it is possible to assign other references. HaplotypeCaller and Strelka2 germline calls are collected for both the tumour and the normal sample, and Manta provides germline structural variants. The somatic variations are calculated by running MuTect2, Strelka and FreeBayes (and MuTect1 optionally). -Somatic structural variants are delivered by Manta, and ASCAT estimates ploidy, tumour heterogeneity and CNVs. The +Somatic structural variants are delivered by Manta, and ASCAT estimates ploidy, tumour heterogeneity and CNVs. The resulting variant call files are annotated by SnpEff and Ensembl-VEP. The annotated calls are further filtered and prioritised by our custom methods. During running the workflow quality control metrics are also calculated and aggregated by MultiQC. diff --git a/docs/abstracts/2018-06-NPMI.md b/docs/abstracts/2018-06-NPMI.md index fbb3d97d8c..7de2d1d533 100644 --- a/docs/abstracts/2018-06-NPMI.md +++ b/docs/abstracts/2018-06-NPMI.md @@ -26,10 +26,10 @@ Max Käller 2D, 7. Clinical Genetics, Dept. of Molecular Medicine and Surgery; 8. Dept. of Medical Sciences; 9. Dept. of Cell and Molecular Biology; -A. Uppsala University; -B. Dept. of Biochemistry and Biophysics; -C. Stockholm University; -D. School of Biotechnology, Division of Gene Technology, Royal Institute of Technology + A. Uppsala University; + B. Dept. of Biochemistry and Biophysics; + C. Stockholm University; + D. School of Biotechnology, Division of Gene Technology, Royal Institute of Technology We present Sarek, a portable Open Source pipeline to resolve germline and somatic variants from WGS data: it is written in Nextflow, a domain-specific language for workflow building. It processes normal samples or normal/tumor pairs (with the option to include matched relapses). diff --git a/docs/abstracts/2018-07-JOBIM.md b/docs/abstracts/2018-07-JOBIM.md index 9a6257cddb..ee32b17d22 100644 --- a/docs/abstracts/2018-07-JOBIM.md +++ b/docs/abstracts/2018-07-JOBIM.md @@ -16,7 +16,7 @@ Björn Nystedt 95A, Monica Nistér 13, Max Käller 2D - Max Käller +Max Käller 1. Barntumörbanken, Dept. of Oncology Pathology; 2. Science for Life Laboratory; @@ -27,10 +27,10 @@ Max Käller 2D 7. Clinical Genetics, Dept. of Molecular Medicine and Surgery; 8. Dept. of Medical Sciences; 9. Dept. of Cell and Molecular Biology; -A. Uppsala University; -B. Dept. of Biochemistry and Biophysics; -C. Stockholm University; -D. School of Biotechnology, Division of Gene Technology, Royal Institute of Technology + A. Uppsala University; + B. Dept. of Biochemistry and Biophysics; + C. Stockholm University; + D. School of Biotechnology, Division of Gene Technology, Royal Institute of Technology We present Sarek, a portable Open Source pipeline to resolve germline and somatic variants from WGS data: it is written in Nextflow, a domain-specific language for workflow building. It processes normal samples or normal/tumor pairs (with the option to include matched relapses). diff --git a/docs/images/BTB_logo.svg b/docs/images/BTB_logo.svg index 099f1101f6..fd35640602 100644 --- a/docs/images/BTB_logo.svg +++ b/docs/images/BTB_logo.svg @@ -181,4 +181,4 @@ d="m 249.627,285.3818 c 0,-2.5 -1.717,-4.271 -4.126,-4.271 -2.412,0 -4.13,1.771 -4.13,4.271 0,2.496 1.718,4.277 4.13,4.277 2.409,0 4.126,-1.781 4.126,-4.277" style="fill:#33b540;fill-opacity:1;fill-rule:nonzero;stroke:none" id="path142" - inkscape:connector-curvature="0" /> \ No newline at end of file + inkscape:connector-curvature="0" /> diff --git a/docs/images/DNGC_logo.png b/docs/images/DNGC_logo.png new file mode 100644 index 0000000000..51106970ff Binary files /dev/null and b/docs/images/DNGC_logo.png differ diff --git a/docs/images/DNGC_logo.svg b/docs/images/DNGC_logo.svg new file mode 100644 index 0000000000..1ad40cd4ba --- /dev/null +++ b/docs/images/DNGC_logo.svg @@ -0,0 +1,163 @@ + + + +image/svg+xml diff --git a/docs/images/GHGA_logo.png b/docs/images/GHGA_logo.png new file mode 100644 index 0000000000..551973b788 Binary files /dev/null and b/docs/images/GHGA_logo.png differ diff --git a/docs/images/GHGA_logo.svg b/docs/images/GHGA_logo.svg new file mode 100644 index 0000000000..a831ac8f85 --- /dev/null +++ b/docs/images/GHGA_logo.svg @@ -0,0 +1 @@ + diff --git a/docs/images/NBIS_logo.svg b/docs/images/NBIS_logo.svg index 337e22cf68..6eb3d9f2b2 100644 --- a/docs/images/NBIS_logo.svg +++ b/docs/images/NBIS_logo.svg @@ -284,4 +284,4 @@ d="m 210.074,0.165 h -1.605 l -6.61,10.067 h -0.066 c 0.086,-1.18 0.133,-2.266 0.133,-3.25 V 0.165 h -1.301 v 11.996 h 1.59 l 6.594,-10.027 h 0.066 c -0.012,0.149 -0.035,0.621 -0.074,1.422 -0.039,0.801 -0.055,1.375 -0.043,1.719 v 6.886 h 1.316 V 0.165" style="fill:#85be42;fill-opacity:1;fill-rule:nonzero;stroke:none" id="path128" - inkscape:connector-curvature="0" /> \ No newline at end of file + inkscape:connector-curvature="0" /> diff --git a/docs/images/NGI_logo.svg b/docs/images/NGI_logo.svg index aef40fd811..0a880c5738 100644 --- a/docs/images/NGI_logo.svg +++ b/docs/images/NGI_logo.svg @@ -330,4 +330,4 @@ d="M 76.914,107 H 97.133 V 98.797 H 76.914 Z" style="fill:#2e86ef;fill-opacity:1;fill-rule:evenodd;stroke:none" id="path170" - inkscape:connector-curvature="0" /> \ No newline at end of file + inkscape:connector-curvature="0" /> diff --git a/docs/images/QBiC_logo.svg b/docs/images/QBiC_logo.svg index 6bc6bd6289..bc67bbb8c3 100644 --- a/docs/images/QBiC_logo.svg +++ b/docs/images/QBiC_logo.svg @@ -100,4 +100,4 @@ inkscape:connector-curvature="0" style="fill:#ffffff" /> - \ No newline at end of file + diff --git a/docs/images/SciLifeLab_logo.svg b/docs/images/SciLifeLab_logo.svg index 3602a3b855..2b21b08612 100644 --- a/docs/images/SciLifeLab_logo.svg +++ b/docs/images/SciLifeLab_logo.svg @@ -140,4 +140,4 @@ d="m 0,0 c -2.646,1.376 -5.669,1.639 -8.513,0.741 -2.843,-0.898 -5.167,-2.85 -6.543,-5.496 -1.376,-2.645 -1.638,-5.668 -0.74,-8.512 0.898,-2.844 2.85,-5.168 5.495,-6.543 1.614,-0.839 3.368,-1.265 5.138,-1.265 1.13,0 2.266,0.174 3.375,0.524 2.843,0.898 5.167,2.85 6.543,5.495 1.375,2.646 1.639,5.669 0.74,8.513 C 4.597,-3.699 2.646,-1.375 0,0" style="fill:#91d342;fill-opacity:1;fill-rule:nonzero;stroke:none" id="path74" - inkscape:connector-curvature="0" /> \ No newline at end of file + inkscape:connector-curvature="0" /> diff --git a/docs/images/mqc_fastqc_adapter.png b/docs/images/mqc_fastqc_adapter.png deleted file mode 100755 index 361d0e47ac..0000000000 Binary files a/docs/images/mqc_fastqc_adapter.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_counts.png b/docs/images/mqc_fastqc_counts.png deleted file mode 100755 index cb39ebb80a..0000000000 Binary files a/docs/images/mqc_fastqc_counts.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png deleted file mode 100755 index a4b89bf56a..0000000000 Binary files a/docs/images/mqc_fastqc_quality.png and /dev/null differ diff --git a/docs/images/nf-core-sarek_logo_light.png b/docs/images/nf-core-sarek_logo_light.png index 208d16951e..61aa1c81d5 100644 Binary files a/docs/images/nf-core-sarek_logo_light.png and b/docs/images/nf-core-sarek_logo_light.png differ diff --git a/docs/images/nf-core-sarek_logo_light.svg b/docs/images/nf-core-sarek_logo_light.svg index f0cffd32b4..7a3bb9bbf6 100644 --- a/docs/images/nf-core-sarek_logo_light.svg +++ b/docs/images/nf-core-sarek_logo_light.svg @@ -2,160 +2,117 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + d="m 105.17203,-43.255454 -8.129199,8.160957 h 23.064239 v -8.160957 z" /> diff --git a/docs/images/nf-core_logo.svg b/docs/images/nf-core_logo.svg index 2998cddb8e..9ab9b9727b 100644 --- a/docs/images/nf-core_logo.svg +++ b/docs/images/nf-core_logo.svg @@ -214,4 +214,4 @@ d="m 208.22356,-176.97293 -26.36698,26.47 h 74.80865 v -26.47 z" id="path4330" style="fill:url(#linearGradient4332)" - inkscape:connector-curvature="0" /> \ No newline at end of file + inkscape:connector-curvature="0" /> diff --git a/docs/images/sarek_icon.png b/docs/images/sarek_icon.png new file mode 100644 index 0000000000..cb30119a69 Binary files /dev/null and b/docs/images/sarek_icon.png differ diff --git a/docs/images/sarek_indices_subway.png b/docs/images/sarek_indices_subway.png new file mode 100644 index 0000000000..08073107f7 Binary files /dev/null and b/docs/images/sarek_indices_subway.png differ diff --git a/docs/images/sarek_indices_subway.svg b/docs/images/sarek_indices_subway.svg new file mode 100644 index 0000000000..2635456b25 --- /dev/null +++ b/docs/images/sarek_indices_subway.svg @@ -0,0 +1,5061 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + dbsnp + panel of normals + known indels + germline resource + + + vcf + + + + + + + + + + vcf + + + + + vcf + + + + + + + + + + tabix index + + + tbi + + + + + tbi + + + + + tbi + + + + + + vcf + + + + + vcf + + + + + + + + + + + + + tbi + + + + fasta dictionnary + + + gatk + + + + + + + + + + + + + + + fasta + + + + bed + + + + + + + + samtools + cnvkit + fasta index + intervals + + + convert + + build indices + + + + bwa + + + build indices + + + bwamem2 + + + + hashtable + + + dragmap + + + + microsatellites + + + msisensorpro + msisensor2, + + + + + + + + + + + + + + + + bed + + + + + fai + + + + + cnn + + Adapted from: Fellows Yates, James A., et al. PeerJ 9 (2021). + + + + + dict + + + + + + msi + + + + + build indices + + + + BBSplit + + + + bbsplit + + + + + + + + + + + + + + + + bwa + + + + + + + + + + + + + + + + bwa + + + + + + + + + + + + + + + + ht + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Index preparation + + + diff --git a/docs/images/sarek_subway.png b/docs/images/sarek_subway.png new file mode 100644 index 0000000000..a743478a59 Binary files /dev/null and b/docs/images/sarek_subway.png differ diff --git a/docs/images/sarek_subway.svg b/docs/images/sarek_subway.svg new file mode 100644 index 0000000000..ee526e85d5 --- /dev/null +++ b/docs/images/sarek_subway.svg @@ -0,0 +1,3498 @@ + + + +image/svg+xmlindexcovindexcovUMImappingvcfvcfvcfvcfvcftxtvcfvcf ...fastqcfastpfasfastqensemblvepsnpeffmosdepth, samtoolsbcftools, vcftoolsmosdepth, samtoolsngscheckmatevarlociraptormultiqcconvertbam/crambam/crambam/cramvcfmarkduplicatesbam/cramvariant callingpre-processingannotationvariant callingTumor-normal pair variant callingAdapted from: Fellows Yates, James A., et al. PeerJ 9 (2021).Core workflowGermline variant callingTumor only variant callingBBsplitprepare recalibrationapplybqsrfilter, normalize,consensusbcftools annotatemantatiddittidditmutect2ascatmsisensorprocontrolfreeccnvkitvcfvcfvcfmusemantalofreqtnscopeconvertExample analysis pathwaysubamspringmpileupmpileuphaplotyperdnascopedeepvariantfreebayeshaplotypecallercracramcramfreebayesOptionalMandatoryOptionally/Only Sentieon acceleratedOptionally Parabricks acceleratedSNPs & IndelsSV & CNVMSIstrelkamsisensor2 diff --git a/docs/images/sarek_workflow.png b/docs/images/sarek_workflow.png index 64eea65ae0..ad54d1f456 100644 Binary files a/docs/images/sarek_workflow.png and b/docs/images/sarek_workflow.png differ diff --git a/docs/images/sarek_workflow.svg b/docs/images/sarek_workflow.svg index 6fcf577a58..65d65d14d7 100644 --- a/docs/images/sarek_workflow.svg +++ b/docs/images/sarek_workflow.svg @@ -2,29 +2,44 @@ image/svg+xml + + + + + + Sarek  SomaticSarek Sarek   ExomeExomeSarek  SomaticSarek Sarek   ExomeExome + + + + + + + + + + + + fastqfastqfastqbambambamvcfvcffastq|spring| ubamfastq|spring| ubamcramcramcramvcfvcfvcfBased on GATK Best PracticesPreprocessing + ry="5.2434611" /> + + +Based on GATK4 Best Practices,optionally accelerated with Sentieon or ParabricksPreprocessing + + + + + + snpEff, VEPsnpEff, VEPsnpEff, VEPbcftools annotate, snpeff, vep + + +Annotation + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:12.2757px;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;font-variation-settings:'wght' 700">Annotation + + + + + + Reports + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:12.2757px;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal">Reports + + + + + + + + + Germline• GATK HaplotypeCaller FreeBayes, mpileup, Strelka2• Manta, TIDDITVariant CallingSomatic• GATK Mutect2 FreeBayes, Strelka2• Manta• ASCAT, Control-FREEC• MSIsensor + + +Variant Calling + + +Somaticfreebayes, mutect2, strelka, lofreq, muse• manta, tiddit• ascat, cnvkit,controlfreec• msisensor2, msisensorpro + + +Germline +• deepvariant, freebayes + GATK haplotypecaller, + Sentieon haplotyper + mpileup, strelka +• indexcov,manta, tiddit +• cnvkit2.7 + transform="scale(1,-1)" /> + + - \ No newline at end of file + type="text/css" /> diff --git a/docs/output.md b/docs/output.md index 55b2965749..95c71913be 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,616 +6,1252 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. -The directories listed below will be created in the results directory after the pipeline has finished. -All paths are relative to the top-level results directory. - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: +- [Directory Structure](#directory-structure) - [Preprocessing](#preprocessing) - - [Map to Reference](#map-to-reference) - - [BWA](#bwa) - - [BWA-mem2](#bwa-mem2) - - [Mark Duplicates](#mark-duplicates) - - [GATK MarkDuplicates](#gatk-markduplicates) - - [Base (Quality Score) Recalibration](#base-quality-score-recalibration) - - [GATK BaseRecalibrator](#gatk-baserecalibrator) - - [GATK ApplyBQSR](#gatk-applybqsr) - - [TSV files](#tsv-files) - - [TSV files with `--skip_markduplicates`](#tsv-files-with---skip_markduplicates) - - [TSV files with `--sentieon`](#tsv-files-with---sentieon) + - [Preparation of input files (FastQ or (u)BAM)](#preparation-of-input-files-fastq-or-ubam) + - [Clip and filter read length](#clip-and-filter-read-length) + - [Trim adapters](#trim-adapters) + - [Split FastQ files](#split-fastq-files) + - [UMI consensus](#umi-consensus) + - [BBSplit contamination removal](#bbsplit-contamination-removal) + - [Map to Reference](#map-to-reference) + - [BWA](#bwa) + - [BWA-mem2](#bwa-mem2) + - [DragMap](#dragmap) + - [Sentieon BWA mem](#sentieon-bwa-mem) + - [Mark Duplicates](#mark-duplicates) + - [GATK MarkDuplicates (Spark)](#gatk-markduplicates-spark) + - [Sentieon LocusCollector and Dedup](#sentieon-locuscollector-and-dedup) + - [Base Quality Score Recalibration](#base-quality-score-recalibration) + - [GATK BaseRecalibrator (Spark)](#gatk-baserecalibrator-spark) + - [GATK ApplyBQSR (Spark)](#gatk-applybqsr-spark) + - [Parabricks FQ2BAM](#parabricks-fq2bam) + - [CSV files](#csv-files) - [Variant Calling](#variant-calling) - - [SNVs and small indels](#snvs-and-small-indels) - - [FreeBayes](#freebayes) - - [GATK HaplotypeCaller](#gatk-haplotypecaller) - - [GATK GenotypeGVCFs](#gatk-genotypegvcfs) - - [GATK Mutect2](#gatk-mutect2) - - [samtools mpileup](#samtools-mpileup) - - [Strelka2](#strelka2) - - [Sentieon DNAseq](#sentieon-dnaseq) - - [Sentieon DNAscope](#sentieon-dnascope) - - [Sentieon TNscope](#sentieon-tnscope) - - [Structural Variants](#structural-variants) - - [Manta](#manta) - - [TIDDIT](#tiddit) - - [Sentieon DNAscope SV](#sentieon-dnascope-sv) - - [Sample heterogeneity, ploidy and CNVs](#sample-heterogeneity-ploidy-and-cnvs) - - [ConvertAlleleCounts](#convertallelecounts) - - [ASCAT](#ascat) - - [Control-FREEC](#control-freec) - - [MSI status](#msi-status) - - [MSIsensor](#msisensor) + - [SNVs and small indels](#snvs-and-small-indels) + - [bcftools](#bcftools) + - [DeepVariant](#deepvariant) + - [FreeBayes](#freebayes) + - [GATK HaplotypeCaller](#gatk-haplotypecaller) + - [GATK Germline Single Sample Variant Calling](#gatk-germline-single-sample-variant-calling) + - [GATK Joint Germline Variant Calling](#gatk-joint-germline-variant-calling) + - [GATK Mutect2](#gatk-mutect2) + - [Lofreq](#lofreq) + - [MuSE](#muse) + - [Sentieon DNAscope](#sentieon-dnascope) + - [Sentieon DNAscope joint germline variant calling](#sentieon-dnascope-joint-germline-variant-calling) + - [Sentieon Haplotyper](#sentieon-haplotyper) + - [Sentieon Haplotyper joint germline variant calling](#sentieon-haplotyper-joint-germline-variant-calling) + - [Sentieon TNscope](#sentieon-tnscope) + - [Strelka](#strelka) + - [Structural Variants](#structural-variants) + - [indexcov](#indexcov) + - [Manta](#manta) + - [TIDDIT](#tiddit) + - [Sample heterogeneity, ploidy and CNVs](#sample-heterogeneity-ploidy-and-cnvs) + - [ASCAT](#ascat) + - [CNVKit](#cnvkit) + - [Control-FREEC](#control-freec) + - [Microsatellite instability (MSI)](#microsatellite-instability-msi) + - [MSIsensor2](#msisensor2) + - [MSIsensorPro](#msisensorpro) +- [Post variant calling](#post-variant-calling) + - [Varlociraptor](#varlociraptor) + - [Filtering](#filtering) + - [Normalization](#normalization) + - [Consensus calling](#consensus-calling) + - [Concatenation](#concatenation) - [Variant annotation](#variant-annotation) - - [snpEff](#snpeff) - - [VEP](#vep) -- [QC and reporting](#qc-and-reporting) - - [QC](#qc) - - [FastQC](#fastqc) - - [bamQC](#bamqc) - - [GATK MarkDuplicates reports](#gatk-markduplicates-reports) - - [samtools stats](#samtools-stats) - - [bcftools stats](#bcftools-stats) - - [VCFtools](#vcftools) - - [snpEff reports](#snpeff-reports) - - [VEP reports](#vep-reports) - - [Reporting](#reporting) - - [MultiQC](#multiqc) - - [Pipeline information](#pipeline-information) + - [snpEff](#snpeff) + - [VEP](#vep) + - [BCFtools annotate](#bcftools-annotate) +- [Quality control and reporting](#quality-control-and-reporting) + - [Quality control](#quality-control) + - [FastQC](#fastqc) + - [FastP](#fastp) + - [Mosdepth](#mosdepth) + - [NGSCheckMate](#ngscheckmate) + - [GATK MarkDuplicates reports](#gatk-markduplicates-reports) + - [Sentieon Dedup reports](#sentieon-dedup-reports) + - [samtools stats](#samtools-stats) + - [bcftools stats](#bcftools-stats) + - [VCFtools](#vcftools) + - [snpEff reports](#snpeff-reports) + - [VEP reports](#vep-reports) + - [Reporting](#reporting) + - [MultiQC](#multiqc) + - [Pipeline information](#pipeline-information) +- [Reference files](#reference-files) + +## Directory Structure + +The default directory structure is as follows + +```text +{outdir} +├── csv +├── multiqc +├── pipeline_info +├── preprocessing +│ ├── markduplicates +│ └── +│ ├── recal_table +│ └── +│ └── recalibrated +│ └── +├── reference +└── reports + ├── + └── +work/ +.nextflow.log +``` ## Preprocessing -`Sarek` pre-processes raw `FASTQ` files or `unmapped BAM` files, based on [GATK best practices](https://gatk.broadinstitute.org/hc/en-us/sections/360007226651-Best-Practices-Workflows). +Sarek pre-processes raw FastQ files or unmapped BAM files, based on [GATK best practices](https://gatk.broadinstitute.org/hc/en-us/sections/360007226651-Best-Practices-Workflows). + +### Preparation of input files (FastQ or (u)BAM) + +[FastP](https://github.com/OpenGene/fastp) is a tool designed to provide all-in-one preprocessing for FastQ files and as such is used for trimming and splitting. By default, these files are not published. However, if publishing is enabled, please be aware that these files are only published once, meaning if trimming and splitting is enabled, then the resulting files will be sharded FastQ files with trimmed reads. If only one of them is enabled then the files contain either trimmed or split reads, respectively. + +#### Clip and filter read length + +[FastP](https://github.com/OpenGene/fastp) enables efficient clipping of reads from either the 5' end (`--clip_r1`, `--clip_r2`) or the 3' end (`--three_prime_clip_r1`, `--three_prime_clip_r2`). Additionally, FastP allows the filtering of reads based on insert size by specifying a minimum required length with the `--length_required` parameter (default: 15bp). It is recommended to optimize these parameters according to the specific characteristics of your data. + +#### Trim adapters + +[FastP](https://github.com/OpenGene/fastp) supports global trimming, which means it trims all reads in the front or the tail. This function is useful since sometimes you want to drop some cycles of a sequencing run. In the current implementation in Sarek +`--detect_adapter_for_pe` is set by default which enables auto-detection of adapter sequences. For more information on how to fine-tune adapter trimming, take a look into the parameter docs. + +The resulting files are intermediate and by default not kept in the final files delivered to users. Set `--save_trimmed` to enable publishing of the files in: + +
+Output files for all samples + +**Output directory: `{outdir}/preprocessing/fastp/`** + +- `__{1,2}.fastp.fastq.gz>` + - Bgzipped FastQ file + +
+ +#### Split FastQ files + +[FastP](https://github.com/OpenGene/fastp) supports splitting of one FastQ file into multiple files allowing parallel alignment of sharded FastQ file. To enable splitting, the number of reads per output can be specified. For more information, take a look into the parameter `--split_fastq`in the parameter docs. + +These files are intermediate and by default not placed in the output-folder kept in the final files delivered to users. Set `--save_split` to enable publishing of these files to: + +
+Output files for all samples + +**Output directory: `{outdir}/preprocessing/fastp//`** + +- `` + - Bgzipped FastQ file + +
+ +#### UMI consensus + +Sarek can create consensus reads when Unique Molecular Identifiers (UMIs) exist, using [fgbio](http://fulcrumgenomics.github.io/fgbio/tools/latest/) tools. Please note that if your UMIs are part of additional index fastq files then you can use [nf-core/fastquorum](https://nf-co.re/fastquorum) to process them. + +These files are intermediate and by default not placed in the output-folder kept in the final files delivered to users. Set `--save_split` to enable publishing of these files to: + +
+Output files for all samples + +**Output directory: `{outdir}/preprocessing/umi//`** + +- `` + +**Output directory: `{outdir}/reports/umi/`** + +- `` + +
+ +#### BBSplit contamination removal + +[BBSplit](http://seqanswers.com/forums/showthread.php?t=41288) is a tool that bins reads by mapping to multiple references simultaneously, using BBMap. The reads go to the bin of the reference they map to best. There are also disambiguation options, such that reads that map to multiple references can be binned with all of them, none of them, one of them, or put in a special "ambiguous" file for each of them. + +This functionality would be especially useful, for example, if you have [mouse PDX](https://en.wikipedia.org/wiki/Patient_derived_xenograft) samples that contain a mixture of human and mouse genomic DNA/RNA and you would like to filter out any mouse derived reads. + +The BBSplit index will have to be built at least once with this pipeline by providing [`--bbsplit_fasta_list`](https://nf-co.re/sarek/parameters#bbsplit_fasta_list) which has to be a file containing 2 columns: short name and full path to reference genome(s): + +```bash +mm10,/path/to/mm10.fa +ecoli,/path/to/ecoli.fa +sarscov2,/path/to/sarscov2.fa +``` + +You can save the index by using the [`--save_reference`](https://nf-co.re/sarek/parameters#save_reference) parameter and then provide it via [`--bbsplit_index`](https://nf-co.re/sarek/parameters#bbsplit_index) for future runs. To enable the tool add `--tools bbsplit` to the run parameters. As described in the `Output files` dropdown box above the FastQ files relative to the main reference genome will always be called `*primary*.fastq.gz`. + +By default, the following parameters are used for BBSplit `ambiguous2=best maxindel=150000`. To overwrite these parameters, use a custom config, as described [here](https://nf-co.re/docs/usage/getting_started/configuration#customising-tool-arguments). + +
+Output files + +- `preprocessing/bbsplit/` + - `*.fastq.gz`: If `--save_bbsplit_reads` is specified FastQ files split by reference will be saved to the results directory. Reads from the main reference genome will be named "_primary_.fastq.gz". Reads from contaminating genomes will be named "__.fastq.gz" where `` is the first column in `--bbsplit_fasta_list` that needs to be provided to initially build the index. + - `*.txt`: File containing statistics on how many reads were assigned to each reference. + +
### Map to Reference #### BWA -[BWA](https://github.com/lh3/bwa) is a software package for mapping low-divergent sequences against a large reference genome. - -Such files are intermediate and not kept in the final files delivered to users. +[BWA](https://github.com/lh3/bwa) is a software package for mapping low-divergent sequences against a large reference genome. The aligned reads are then coordinate-sorted (or name-sorted if [`GATK MarkDuplicatesSpark`](https://gatk.broadinstitute.org/hc/en-us/articles/5358833264411-MarkDuplicatesSpark) is used for duplicate marking) with [samtools](https://www.htslib.org/doc/samtools.html). #### BWA-mem2 -[BWA-mem2](https://github.com/bwa-mem2/bwa-mem2) is a software package for mapping low-divergent sequences against a large reference genome. +[BWA-mem2](https://github.com/bwa-mem2/bwa-mem2) is a software package for mapping low-divergent sequences against a large reference genome.The aligned reads are then coordinate-sorted (or name-sorted if [`GATK MarkDuplicatesSpark`](https://gatk.broadinstitute.org/hc/en-us/articles/5358833264411-MarkDuplicatesSpark) is used for duplicate marking) with [samtools](https://www.htslib.org/doc/samtools.html). -Such files are intermediate and not kept in the final files delivered to users. +#### DragMap -### Mark Duplicates +[DragMap](https://github.com/Illumina/dragmap) is an open-source software implementation of the DRAGEN mapper, which the Illumina team created so that we would have an open-source way to produce the same results as their proprietary DRAGEN hardware. The aligned reads are then coordinate-sorted (or name-sorted if [`GATK MarkDuplicatesSpark`](https://gatk.broadinstitute.org/hc/en-us/articles/5358833264411-MarkDuplicatesSpark) is used for duplicate marking) with [samtools](https://www.htslib.org/doc/samtools.html). -#### GATK MarkDuplicates +These files are intermediate and by default not placed in the output-folder kept in the final files delivered to users. Set `--save_mapped` to enable publishing, furthermore add the flag `save_output_as_bam` for publishing in BAM format. -By default, `Sarek` will use [GATK MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360042477492-MarkDuplicates-Picard), which locates and tags duplicate reads in a `BAM` or `SAM` file, where duplicate reads are defined as originating from a single fragment of DNA. +#### Sentieon BWA mem -Specify `--use_gatk_spark` to use [`GATK MarkDuplicatesSpark`](https://gatk.broadinstitute.org/hc/en-us/articles/360042912511-MarkDuplicatesSpark) instead, `Spark` implementation of `GATK MarkDuplicates`. +Sentieon [bwa mem](https://support.sentieon.com/manual/usages/general/#bwa-mem-syntax) is a subroutine for mapping low-divergent sequences against a large reference genome. It is part of the proprietary software package [DNAseq](https://www.sentieon.com/detailed-description-of-pipelines/#dnaseq) from [Sentieon](https://www.sentieon.com/). -This directory is the location for the `BAM` files delivered to users. -Besides the `duplicates-marked BAM` files, the recalibration tables (`*.recal.table`) are also stored, and can be used to create `recalibrated BAM` files. +The aligned reads are coordinate-sorted with Sentieon. -For all samples: +
+Output files for all mappers and samples + +The alignment files (BAM or CRAM) produced by the chosen aligner are not published by default. CRAM output files will not be saved in the output-folder (`outdir`), unless the flag `--save_mapped` is used. BAM output can be selected by setting the flag `--save_output_as_bam`. + +**Output directory: `{outdir}/preprocessing/mapped//`** + +- if `--save_mapped`: `.sorted.cram` and `.sorted.cram.crai` + - CRAM file and index + +- if `--save_mapped --save_output_as_bam`: `.sorted.bam` and `.sorted.bam.bai` + - BAM file and index +
-**Output directory: `results/Preprocessing/[SAMPLE]/DuplicatesMarked`** +### Mark Duplicates -- `[SAMPLE].md.bam` and `[SAMPLE].md.bai` - - `BAM` file and index +During duplicate marking, read pairs that are likely to have originated from duplicates of the same original DNA fragments through some artificial processes are identified. These are considered to be non-independent observations, so all but a single read pair within each set of duplicates are marked, causing the marked pairs to be ignored by default during the variant discovery process. For further reading and documentation see the [data pre-processing for variant discovery from the GATK best practices](https://gatk.broadinstitute.org/hc/en-us/articles/360035535912-Data-pre-processing-for-variant-discovery). -### Base (Quality Score) Recalibration +#### GATK MarkDuplicates (Spark) -#### GATK BaseRecalibrator +By default, Sarek will use [GATK MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/5358880192027-MarkDuplicates-Picard-). -[GATK BaseRecalibrator](https://gatk.broadinstitute.org/hc/en-us/articles/360042477672-BaseRecalibrator) generates a recalibration table based on various co-variates. +To use the corresponding spark implementation [`GATK MarkDuplicatesSpark`](https://gatk.broadinstitute.org/hc/en-us/articles/5358833264411-MarkDuplicatesSpark), please specify `--use_gatk_spark markduplicates`. The resulting files are converted to CRAM with either [samtools](https://www.htslib.org/doc/samtools.html), when GATK MarkDuplicates is used, or, implicitly, by GATK MarkDuplicatesSpark. -For all samples: +The resulting CRAM files are delivered to the users. -**Output directory: `results/Preprocessing/[SAMPLE]/DuplicatesMarked`** +
+Output files for all samples -- `[SAMPLE].recal.table` - - Recalibration table associated to the `duplicates-marked BAM` file. +**Output directory: `{outdir}/preprocessing/markduplicates//`** -#### GATK ApplyBQSR +- `.md.cram` and `.md.cram.crai` + - CRAM file and index +- if `--save_output_as_bam`: + - `.md.bam` and `.md.bam.bai` -[GATK ApplyBQSR](https://gatk.broadinstitute.org/hc/en-us/articles/360042476852-ApplyBQSR) recalibrates the base qualities of the input reads based on the recalibration table produced by the [GATK BaseRecalibrator](#gatk-baserecalibrator) tool. +
-This directory is the location for the final `recalibrated BAM` files. -`Recalibrated BAM` files are usually 2-3 times larger than the `duplicates-marked BAM` files. -To re-generate `recalibrated BAM` file you have to apply the recalibration table delivered to the `DuplicatesMarked\` folder either using `Sarek` ( [`--step recalibrate`](usage.md#step-recalibrate) ) , or doing this recalibration yourself. +### Sentieon LocusCollector and Dedup -For all samples: +The subroutines LocusCollector and Dedup are part of Sentieon DNAseq packages with speedup versions of the standard GATK tools, and together those two subroutines correspond to GATK's MarkDuplicates. -**Output directory: `results/Preprocessing/[SAMPLE]/Recalibrated`** +The subroutine [LocusCollector](https://support.sentieon.com/manual/usages/general/#driver-algorithm-syntax) collects read information that will be used for removing or tagging duplicate reads; its output is the score file indicating which reads are likely duplicates. -- `[SAMPLE].recal.bam` and `[SAMPLE].recal.bam.bai` - - `BAM` file and index +The subroutine [Dedup](https://support.sentieon.com/manual/usages/general/#dedup-algorithm) marks or removes duplicate reads based on the score file supplied by LocusCollector, and produces a BAM or CRAM file. -For further reading and documentation see the [data pre-processing for variant discovery from the GATK best practices](https://gatk.broadinstitute.org/hc/en-us/articles/360035535912-Data-pre-processing-for-variant-discovery). +
+Output files for all samples + +**Output directory: `{outdir}/preprocessing/sentieon_dedup//`** + +- `.dedup.cram` and `.dedup.cram.crai` + - CRAM file and index +- if `--save_output_as_bam`: + - `.dedup.bam` and `.dedup.bam.bai` + +
-### TSV files +### Base Quality Score Recalibration -The `TSV` files are auto-generated and can be used by `Sarek` for further processing and/or variant calling. +During Base Quality Score Recalibration, systematic errors in the base quality scores are corrected by applying machine learning to detect and correct for them. This is important for evaluating the correct call of a variant during the variant discovery process. However, this is not needed for all combinations of tools in Sarek. Notably, this should be turned off when having UMI tagged reads or using DragMap (see [here](https://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode)) as mapper. -For further reading and documentation see the [`--input`](usage.md#--input) section in the usage documentation. +For further reading and documentation see the [technical documentation by GATK](https://gatk.broadinstitute.org/hc/en-us/articles/360035890531-Base-Quality-Score-Recalibration-BQSR-). -For all samples: +#### GATK BaseRecalibrator (Spark) -**Output directory: `results/Preprocessing/TSV`** +[GATK BaseRecalibrator](https://gatk.broadinstitute.org/hc/en-us/articles/360042477672-BaseRecalibrator) generates a recalibration table based on various co-variates. + +To use the corresponding spark implementation [GATK BaseRecalibratorSpark](https://gatk.broadinstitute.org/hc/en-us/articles/5358896138011-BaseRecalibrator), please specify `--use_gatk_spark baserecalibrator`. -- `duplicates_marked_no_table.tsv`, `duplicates_marked.tsv` and `recalibrated.tsv` - - `TSV` files to start `Sarek` from `prepare_recalibration`, `recalibrate` or `variantcalling` steps. -- `duplicates_marked_no_table_[SAMPLE].tsv`, `duplicates_marked_[SAMPLE].tsv` and `recalibrated_[SAMPLE].tsv` - - `TSV` files to start `Sarek` from `prepare_recalibration`, `recalibrate` or `variantcalling` steps for a specific sample. +
+Output files for all samples -### TSV files with `--skip_markduplicates` +**Output directory: `{outdir}/preprocessing/recal_table//`** -> **WARNING** Only with [`--skip_markduplicates`](usage.md#--skip_markduplicates) +- `.recal.table` + - Recalibration table associated to the duplicates-marked CRAM file. + +
-For all samples: +#### GATK ApplyBQSR (Spark) -**Output directory: `results/Preprocessing/TSV`** +[GATK ApplyBQSR](https://gatk.broadinstitute.org/hc/en-us/articles/5358826654875-ApplyBQSR) recalibrates the base qualities of the input reads based on the recalibration table produced by the [GATK BaseRecalibrator](#gatk-baserecalibrator) tool. -- `mapped.tsv`, `mapped_no_duplicates_marked.tsv` and `recalibrated.tsv` - - `TSV` files to start `Sarek` from `prepare_recalibration`, `recalibrate` or `variantcalling` steps. -- `mapped_[SAMPLE].tsv`, `mapped_no_duplicates_marked_[SAMPLE].tsv` and `recalibrated_[SAMPLE].tsv` - - `TSV` files to start `Sarek` from `prepare_recalibration`, `recalibrate` or `variantcalling` steps for a specific sample. +Specify `--use_gatk_spark baserecalibrator` to use [GATK ApplyBQSRSpark](https://gatk.broadinstitute.org/hc/en-us/articles/5358898266011-ApplyBQSRSpark-BETA-) instead, the respective spark implementation. -### TSV files with `--sentieon` +The resulting recalibrated CRAM files are delivered to the user. Recalibrated CRAM files are usually 2-3 times larger than the duplicate-marked CRAM files. -> **WARNING** Only with [`--sentieon`](usage.md#--sentieon) +
+Output files for all samples + +**Output directory: `{outdir}/preprocessing/recalibrated//`** + +- `.recal.cram` and `.recal.cram.crai` + - CRAM file and index +- if `--save_output_as_bam`: + - `.recal.bam` and `.recal.bam.bai` - BAM file and index +
+ +### Parabricks FQ2BAM + +> [!NOTE] +> This is an experimental addition to the pipeline which is not at feature parity with the GATK implementation. + +[Parabricks FQ2BAM](https://docs.nvidia.com/clara/parabricks/latest/documentation/tooldocs/man_fq2bam.html) runs as alternative to GATK preprocessing, enables by `--aligner parabricks --profile ,gpu`. + +The resulting recalibrated BAM (if `--save_output_as_bam`) or CRAM files are delivered to the user (if `--save_reference`). + +
+Output files for all samples -For all samples: +**Output directory: `{outdir}/preprocessing/parabricks//`** -**Output directory: `results/Preprocessing/TSV`** +- `.{bam,cram}` and `.{bam.bai,cram.crai}` + - BAM or CRAM file and index +
-- `sentieon_deduped.tsv` and `recalibrated_sentieon.tsv` - - `TSV` files to start `Sarek` from `variantcalling` step. -- `sentieon_deduped_[SAMPLE].tsv` and `recalibrated_sentieon_[SAMPLE].tsv` - - `TSV` files to start `Sarek` from `variantcalling` step for a specific sample. +### CSV files + +The CSV files are auto-generated and can be used by Sarek for further processing and/or variant calling. + +See the [`input`](usage#input-sample-sheet-configurations) section in the usage documentation for further reading and documentation on how to make the most of them. + +
+Output files: + +**Output directory: `{outdir}/preprocessing/csv`** + +- `mapped.csv` + - if `--save_mapped` + - CSV containing an entry for each sample with the columns `patient,sample,sex,status,bam,bai` +- `markduplicates_no_table.csv` + - CSV containing an entry for each sample with the columns `patient,sample,sex,status,cram,crai` +- `markduplicates.csv` + - CSV containing an entry for each sample with the columns `patient,sample,sex,status,cram,crai,table` +- `recalibrated.csv` + - CSV containing an entry for each sample with the columns`patient,sample,sex,status,cram,crai` +- `variantcalled.csv` + - CSV containing an entry for each sample with the columns `patient,sample,vcf` +
## Variant Calling -All the results regarding Variant Calling are collected in this directory. -If some results from a variant caller do not appear here, please check out the [`--tools`](usage.md#--tools) section in the usage documentation. +The results regarding variant calling are collected in `{outdir}/variant_calling/`. +If some results from a variant caller do not appear here, please check out the `--tools` section in the parameter [documentation](https://nf-co.re/sarek/latest/parameters). -`Recalibrated BAM` files can used as an input to start the Variant Calling. +(Recalibrated) CRAM files can used as an input to start the variant calling. ### SNVs and small indels +For single nucleotide variants (SNVs) and small indels, multiple tools are available for normal (germline), tumor-only, and tumor-normal (somatic) paired data. For a list of the appropriate tool(s) for the data and sequencing type at hand, please check [here](usage#which-tool). + +#### bcftools + +[bcftools mpileup](https://samtools.github.io/bcftools/bcftools.html#mpileup) generates pileup of a CRAM file, followed by [bcftools call](https://samtools.github.io/bcftools/bcftools.html#call) and filtered with `-i 'count(GT==\"RR\")==0`. +For further reading and documentation see the [bcftools manual](https://samtools.github.io/bcftools/howtos/variant-calling.html). + +
+Output files for all samples + +**Output directory: `{outdir}/variant_calling/bcftools//`** + +- `.bcftools.vcf.gz` and `.bcftools.vcf.gz.tbi` + - VCF with tabix index + +
+ +#### DeepVariant + +[DeepVariant](https://github.com/google/deepvariant) is a deep learning-based variant caller that takes aligned reads, produces pileup image tensors from them, classifies each tensor using a convolutional neural network and finally reports the results in a standard VCF or gVCF file. For further documentation take a look [here](https://github.com/google/deepvariant/tree/r1.4/docs). + +
+Output files for normal samples + +**Output directory: `{outdir}/variant_calling/deepvariant//`** + +- `.deepvariant.vcf.gz` and `.deepvariant.vcf.gz.tbi` + - VCF with tabix index +- `.deepvariant.g.vcf.gz` and `.deepvariant.g.vcf.gz.tbi` + - gVCF with tabix index +
+ #### FreeBayes -[FreeBayes](https://github.com/ekg/freebayes) is a Bayesian genetic variant detector designed to find small polymorphisms, specifically SNPs, indels, MNPs, and complex events smaller than the length of a short-read sequencing alignment. +[FreeBayes](https://github.com/ekg/freebayes) is a Bayesian genetic variant detector designed to find small polymorphisms, specifically SNPs, indels, MNPs, and complex events smaller than the length of a short-read sequencing alignment. For further reading and documentation see the [FreeBayes manual](https://github.com/ekg/freebayes/blob/master/README.md#user-manual-and-guide). -For all samples: +
+Output files for all samples -**Output directory: `results/VariantCalling/[SAMPLE]/FreeBayes`** +**Output directory: `{outdir}/variant_calling/freebayes/{sample,normalsample_vs_tumorsample}/`** -- `FreeBayes_[SAMPLE].vcf.gz` and `FreeBayes_[SAMPLE].vcf.gz.tbi` - - `VCF` with Tabix index +- `.freebayes.vcf.gz` and `.freebayes.vcf.gz.tbi` + - VCF with tabix index -For further reading and documentation see the [FreeBayes manual](https://github.com/ekg/freebayes/blob/master/README.md#user-manual-and-guide). +
#### GATK HaplotypeCaller -[GATK HaplotypeCaller](https://gatk.broadinstitute.org/hc/en-us/articles/360042913231-HaplotypeCaller) calls germline SNPs and indels via local re-assembly of haplotypes. +[GATK HaplotypeCaller](https://gatk.broadinstitute.org/hc/en-us/articles/5358864757787-HaplotypeCaller) calls germline SNPs and indels via local re-assembly of haplotypes. + +
+Output files for normal samples -Germline calls are provided for all samples, to enable comparison of both, tumor and normal, for possible mixup. +**Output directory: `{outdir}/variant_calling/haplotypecaller//`** -For all samples: +- `.haplotypecaller.vcf.gz` and `.haplotypecaller.vcf.gz.tbi` + - VCF with tabix index + +
-**Output directory: `results/VariantCalling/[SAMPLE]/HaploTypeCaller`** +##### GATK Germline Single Sample Variant Calling -- `HaplotypeCaller_[SAMPLE].vcf.gz` and `HaplotypeCaller_[SAMPLE].vcf.gz.tbi` - - `VCF` with Tabix index +[GATK Single Sample Variant Calling](https://gatk.broadinstitute.org/hc/en-us/articles/360035535932-Germline-short-variant-discovery-SNPs-Indels-) +uses HaplotypeCaller in its default single-sample mode to call variants. The VCF that HaplotypeCaller emits errors on the side of sensitivity, therefore they are filtered by first running the [CNNScoreVariants](https://gatk.broadinstitute.org/hc/en-us/articles/5358904862107-CNNScoreVariants) tool. This tool annotates each variant with a score indicating the model's prediction of the quality of each variant. To apply filters based on those scores run the [FilterVariantTranches](https://gatk.broadinstitute.org/hc/en-us/articles/5358928898971-FilterVariantTranches) tool with SNP and INDEL sensitivity tranches appropriate for your task. -For further reading and documentation see the [HaplotypeCaller manual](https://gatk.broadinstitute.org/hc/en-us/articles/360042913231-HaplotypeCaller). +If the haplotype-called VCF files are not filtered, then Sarek should be run with at least one of the options `--dbsnp` or `--known_indels`. -#### GATK GenotypeGVCFs +
+Output files for normal samples -[GATK GenotypeGVCFs](https://gatk.broadinstitute.org/hc/en-us/articles/360042914991-GenotypeGVCFs) performs joint genotyping on one or more samples pre-called with HaplotypeCaller. +**Output directory: `{outdir}/variant_calling/haplotypecaller//`** -Germline calls are provided for all samples, to enable comparison of both, tumor and normal, for possible mixup. +- `.haplotypecaller.filtered.vcf.gz` and `.haplotypecaller.filtered.vcf.gz.tbi` + - VCF with tabix index -For all samples: +
-**Output directory: `results/VariantCalling/[SAMPLE]/HaplotypeCallerGVCF`** +##### GATK Joint Germline Variant Calling -- `HaplotypeCaller_[SAMPLE].g.vcf.gz` and `HaplotypeCaller_[SAMPLE].g.vcf.gz.tbi` - - `VCF` with Tabix index +[GATK Joint germline Variant Calling](https://gatk.broadinstitute.org/hc/en-us/articles/360035535932-Germline-short-variant-discovery-SNPs-Indels-) uses Haplotypecaller per sample in `gvcf` mode. Next, the gVCFs are consolidated from multiple samples into a [GenomicsDB](https://gatk.broadinstitute.org/hc/en-us/articles/5358869876891-GenomicsDBImport) datastore. After joint [genotyping](https://gatk.broadinstitute.org/hc/en-us/articles/5358906861083-GenotypeGVCFs), [VQSR](https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator) is applied for filtering to produce the final multisample callset with the desired balance of precision and sensitivity. -For further reading and documentation see the [GenotypeGVCFs manual](https://gatk.broadinstitute.org/hc/en-us/articles/360042914991-GenotypeGVCFs). +
+Output files from joint germline variant calling -#### GATK Mutect2 +**Output directory: `{outdir}/variant_calling/haplotypecaller//`** -[GATK Mutect2](https://gatk.broadinstitute.org/hc/en-us/articles/360042477952-Mutect2) calls somatic SNVs and indels via local assembly of haplotypes. +- `.haplotypecaller.g.vcf.gz` and `.haplotypecaller.g.vcf.gz.tbi` + - gVCF with tabix index -For further reading and documentation see the [Mutect2 manual](https://gatk.broadinstitute.org/hc/en-us/articles/360042477952-Mutect2). -It is recommended to have [panel of normals (PON)](https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON) for this version of `GATK Mutect2` using at least 40 normal samples. -Additionally, you can add your `PON` file to get filtered somatic calls. +**Output directory: `{outdir}/variant_calling/haplotypecaller/joint_variant_calling/`** -For a Tumor/Normal pair: +- `joint_germline.vcf.gz` and `joint_germline.vcf.gz.tbi` + - VCF with tabix index +- `joint_germline_recalibrated.vcf.gz` and `joint_germline_recalibrated.vcf.gz.tbi` + - variant recalibrated VCF with tabix index (if VQSR is applied) -**Output directory: `results/VariantCalling/[TUMOR_vs_NORMAL]/Mutect2`** +
+ +#### GATK Mutect2 + +[GATK Mutect2](https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2) calls somatic SNVs and indels via local assembly of haplotypes. +When `--joint_mutect2` is used, Mutect2 subworkflow outputs will be saved in a subfolder named with the patient ID and `{patient}.mutect2.vcf.gz` file will contain variant calls from all of the normal and tumor samples of the patient. +For further reading and documentation see the [Mutect2 manual](https://gatk.broadinstitute.org/hc/en-us/articles/360035531132). +It is not required, but recommended to have a [panel of normals (PON)](https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON) using at least 40 normal samples to get filtered somatic calls. When using `--genome GATK.GRCh38`, a panel-of-normals file is available. However, it is _highly_ recommended to create one matching your tumor samples. Creating your own panel-of-normals is currently not natively supported by the pipeline. See [here](https://gatk.broadinstitute.org/hc/en-us/articles/360035531132) for how to create one manually. + +
+Output files for tumor-only and tumor/normal paired samples + +**Output directory: `{outdir}/variant_calling/mutect2/{sample,tumorsample_vs_normalsample,patient}/`** Files created: -- `Mutect2_unfiltered_[TUMORSAMPLE]_vs_[NORMALSAMPLE].vcf.gz` and `Mutect2_unfiltered_[TUMORSAMPLE]_vs_[NORMALSAMPLE].vcf.gz.tbi` - - unfiltered (raw) Mutect2 calls `VCF` with Tabix index -- `Mutect2_filtered_[TUMORSAMPLE]_vs_[NORMALSAMPLE].vcf.gz` and `Mutect2_filtered_[TUMORSAMPLE]_vs_[NORMALSAMPLE].vcf.gz.tbi` - - filtered Mutect2 calls `VCF` with Tabix index: these entries have a `PASS` filter, you can get these when supplying a panel of normals using the `--pon` option -- `[TUMORSAMPLE]_vs_[NORMALSAMPLE].vcf.gz.stats` - - a stats file generated during calling of raw variants (needed for filtering) -- `[TUMORSAMPLE]_contamination.table` - - a text file exported when panel-of-normals about sample contamination are provided +- `{sample,tumorsample_vs_normalsample,patient}.mutect2.vcf.gz` and `{sample,tumorsample_vs_normalsample,patient}.mutect2.vcf.gz.tbi` + - unfiltered (raw) Mutect2 calls VCF with tabix index +- `{sample,tumorsample_vs_normalsample,patient}.mutect2.vcf.gz.stats` + - a stats file generated during calling of raw variants (needed for filtering) +- `{sample,tumorsample_vs_normalsample}.mutect2.contamination.table` + - table calculating the fraction of reads coming from cross-sample contamination +- `{sample,tumorsample_vs_normalsample}.mutect2.segmentation.table` + - table containing segmentation of the tumor by minor allele fraction +- `{sample,tumorsample_vs_normalsample,patient}.mutect2.artifactprior.tar.gz` + - prior probabilities for read orientation artifacts +- `{sample,tumorsample,normalsample}.mutect2.pileups.table` + - tabulates pileup metrics for inferring contamination +- `{sample,tumorsample_vs_normalsample,patient}.mutect2.filtered.vcf.gz` and `{sample,tumorsample_vs_normalsample,patient}.mutect2.filtered.vcf.gz.tbi` + - filtered Mutect2 calls VCF with tabix index based on the probability that a variant is somatic +- `{sample,tumorsample_vs_normalsample,patient}.mutect2.filtered.vcf.gz.filteringStats.tsv` + - a stats file generated during the filtering of Mutect2 called variants -#### samtools mpileup +
-[samtools mpileup](https://www.htslib.org/doc/samtools.html) generates pileup of a `BAM` file. +#### Lofreq -For all samples: +[Lofreq](https://github.com/CSB5/lofreq) is a fast and sensitive variant-caller for inferring SNVs and indels from next-generation sequencing data. It makes full use of base-call qualities and other sources of errors inherent in sequencing, which are usually ignored by other methods or only used for filtering. For further reading and documentation see the [Lofreq user guide](https://csb5.github.io/lofreq/). -**Output directory: `results/VariantCalling/[SAMPLE]/mpileup`** +
+Output files for tumor-only samples -- `[SAMPLE].pileup.gz` - - The pileup format is a text-based format for summarizing the base calls of aligned reads to a reference sequence. Alignment records are grouped by sample (`SM`) identifiers in `@RG` header lines. +**Output directory: `{outdir}/variant_calling/lofreq//`** -For further reading and documentation see the [samtools manual](https://www.htslib.org/doc/samtools.html#COMMANDS_AND_OPTIONS). +- `.vcf.gz` + - VCF which provides a detailed description of the detected genetic variants. -#### Strelka2 +
-[Strelka2](https://github.com/Illumina/strelka) is a fast and accurate small variant caller optimized for analysis of germline variation in small cohorts and somatic variation in tumor/normal sample pairs. +#### MuSE -For all samples: +[MuSE](https://github.com/wwylab/MuSE) is an accurate and ultra-fast somatic mutation calling tool for whole-genome sequencing (WGS) and whole-exome sequencing (WES) data from heterogeneous tumor samples. This tool is unique in accounting for tumor heterogeneity using a sample-specific error model that improves sensitivity and specificity in mutation calling from sequencing data. For further reading see the [recently published paper](https://genome.cshlp.org/content/early/2024/05/03/gr.278456.123.long). -**Output directory: `results/VariantCalling/[SAMPLE]/Strelka`** +
+Output files for tumor-normal samples -- `Strelka_Sample_genome.vcf.gz` and `Strelka_Sample_genome.vcf.gz.tbi` - - `VCF` with Tabix index -- `Strelka_Sample_variants.vcf.gz` and `Strelka_Sample_variants.vcf.gz.tbi` - - `VCF` with Tabix index +**Output directory: `{outdir}/variant_calling/muse//`** -For a Tumor/Normal pair: +- `.MuSE.txt` + - TXT containing position-specific summary statistics. +- `.muse.vcf.gz` + - VCF with called variants. Fields are named TUMOR and NORMAL. -**Output directory: `results/VariantCalling/[TUMOR_vs_NORMAL]/Strelka`** +
-- `Strelka_[TUMORSAMPLE]_vs_[NORMALSAMPLE]_somatic_indels.vcf.gz` and `Strelka_[TUMORSAMPLE]_vs_[NORMALSAMPLE]_somatic_indels.vcf.gz.tbi` - - `VCF` with Tabix index -- `Strelka_[TUMORSAMPLE]_vs_[NORMALSAMPLE]_somatic_snvs.vcf.gz` and `Strelka_[TUMORSAMPLE]_vs_[NORMALSAMPLE]_somatic_snvs.vcf.gz.tbi` - - `VCF` with Tabix index +#### Sentieon DNAscope -Using [Strelka Best Practices](https://github.com/Illumina/strelka/blob/master/docs/userGuide/README.md#somatic-configuration-example) with the `candidateSmallIndels` from `Manta`: +[Sentieon DNAscope](https://support.sentieon.com/appnotes/dnascope_ml/#dnascope-germline-variant-calling-with-a-machine-learning-model) is a variant-caller which aims at outperforming GATK's Haplotypecaller in terms of both speed and accuracy. DNAscope allows you to use a machine learning model to perform variant calling with higher accuracy by improving the candidate detection and filtering. -**Output directory: `results/VariantCalling/[TUMOR_vs_NORMAL]/Strelka`** +
+Unfiltered VCF-files for normal samples -- `StrelkaBP_[TUMORSAMPLE]_vs_[NORMALSAMPLE]_somatic_indels.vcf.gz` and `StrelkaBP_[TUMORSAMPLE]_vs_[NORMALSAMPLE]_somatic_indels.vcf.gz.tbi` - - `VCF` with Tabix index -- `StrelkaBP_[TUMORSAMPLE]_vs_[NORMALSAMPLE]_somatic_snvs.vcf.gz` and `StrelkaBP_[TUMORSAMPLE]_vs_[NORMALSAMPLE]_somatic_snvs.vcf.gz.tbi` - - `VCF` with Tabix index +**Output directory: `{outdir}/variant_calling/sentieon_dnascope//`** -For further reading and documentation see the [Strelka2 user guide](https://github.com/Illumina/strelka/blob/master/docs/userGuide/README.md). +- `.dnascope.unfiltered.vcf.gz` and `.dnascope.unfiltered.vcf.gz.tbi` + - VCF with tabix index -#### Sentieon DNAseq +
-> **WARNING** Only with [`--sentieon`](usage.md#--sentieon) +The output from Sentieon's DNAscope can be controlled through the option `--sentieon_dnascope_emit_mode` for Sarek, see [Basic usage of Sentieon functions](#basic-usage-of-sentieon-functions). -[Sentieon DNAseq](https://www.sentieon.com/products/#dnaseq) implements the same mathematics used in the Broad Institute's BWA-GATK HaplotypeCaller 3.3-4.1 Best Practices Workflow pipeline. +Unless `dnascope_filter` is listed under `--skip_tools` in the nextflow command, Sentieon's [DNAModelApply](https://support.sentieon.com/manual/usages/general/#dnamodelapply-algorithm) is applied to the unfiltered VCF-files in order to obtain filtered VCF-files. -For all samples: +
+Filtered VCF-files for normal samples -**Output directory: `results/VariantCalling/[SAMPLE]/SentieonDNAseq`** +**Output directory: `{outdir}/variant_calling/sentieon_dnascope//`** -- `DNAseq_Sample.vcf.gz` and `DNAseq_Sample.vcf.gz.tbi` - - `VCF` with Tabix index +- `.dnascope.filtered.vcf.gz` and `.dnascope.filtered.vcf.gz.tbi` + - VCF with tabix index -For further reading and documentation see the [Sentieon DNAseq user guide](https://support.sentieon.com/manual/DNAseq_usage/dnaseq/). +
-#### Sentieon DNAscope +##### Sentieon DNAscope joint germline variant calling + +In Sentieon's package DNAscope, joint germline variant calling is done by first running Sentieon's Dnacope in emit-mode `gvcf` for each sample and then running Sentieon's [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) on the set of gVCF-files. See [Basic usage of Sentieon functions](#basic-usage-of-sentieon-functions) for information on how joint germline variant calling can be done in Sarek using Sentieon's DNAscope. + +
+Output files from joint germline variant calling + +**Output directory: `{outdir}/variant_calling/sentieon_dnascope//`** + +- `.dnascope.g.vcf.gz` and `.dnascope.g.vcf.gz.tbi` + - VCF with tabix index + +**Output directory: `{outdir}/variant_calling/sentieon_dnascope/joint_variant_calling/`** + +- `joint_germline.vcf.gz` and `joint_germline.vcf.gz.tbi` + - VCF with tabix index + +
+ +#### Sentieon Haplotyper + +[Sentieon Haplotyper](https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm) is Sention's speedup version of GATK's Haplotypecaller (see above). + +
+Unfiltered VCF-files for normal samples + +**Output directory: `{outdir}/variant_calling/sentieon_haplotyper//`** -> **WARNING** Only with [`--sentieon`](usage.md#--sentieon) +- `.haplotyper.unfiltered.vcf.gz` and `.haplotyper.unfiltered.vcf.gz.tbi` + - VCF with tabix index -[Sentieon DNAscope](https://www.sentieon.com/products) calls SNPs and small indels. +
+ +The output from Sentieon's Haplotyper can be controlled through the option `--sentieon_haplotyper_emit_mode` for Sarek, see [Basic usage of Sentieon functions](#basic-usage-of-sentieon-functions). + +Unless `haplotyper_filter` is listed under `--skip_tools` in the nextflow command, GATK's CNNScoreVariants and FilterVariantTranches (see above) is applied to the unfiltered VCF-files in order to obtain filtered VCF-files. + +
+Filtered VCF-files for normal samples -For all samples: +**Output directory: `{outdir}/variant_calling/sentieon_haplotyper//`** -**Output directory: `results/VariantCalling/[SAMPLE]/SentieonDNAscope`** +- `.haplotyper.filtered.vcf.gz` and `.haplotyper.filtered.vcf.gz.tbi` + - VCF with tabix index -- `DNAscope_Sample.vcf.gz` and `DNAscope_Sample.vcf.gz.tbi` - - `VCF` with Tabix index +
+ +##### Sentieon Haplotyper joint germline variant calling + +In Sentieon's package DNAseq, joint germline variant calling is done by first running Sentieon's Haplotyper in emit-mode `gvcf` for each sample and then running Sentieon's [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) on the set of gVCF-files. See [Basic usage of Sentieon functions](#basic-usage-of-sentieon-functions) for information on how joint germline variant calling can be done in Sarek using Sentieon's DNAseq. After joint genotyping, Sentieon's version of VQSR ([VarCal](https://support.sentieon.com/manual/usages/general/#varcal-algorithm) and [ApplyVarCal](https://support.sentieon.com/manual/usages/general/#applyvarcal-algorithm)) is applied for filtering to produce the final multisample callset with the desired balance of precision and sensitivity. + +
+Output files from joint germline variant calling -For further reading and documentation see the [Sentieon DNAscope user guide](https://support.sentieon.com/manual/DNAscope_usage/dnascope/). +**Output directory: `{outdir}/variant_calling/sentieon_haplotyper//`** + +- `.haplotyper.g.vcf.gz` and `.haplotyper.g.vcf.gz.tbi` + - VCF with tabix index + +**Output directory: `{outdir}/variant_calling/sentieon_haplotyper/joint_variant_calling/`** + +- `joint_germline.vcf.gz` and `joint_germline.vcf.gz.tbi` + - VCF with tabix index +- `joint_germline_recalibrated.vcf.gz` and `joint_germline_recalibrated.vcf.gz.tbi` + - variant recalibrated VCF with tabix index (if VarCal is applied) + +
#### Sentieon TNscope -> **WARNING** Only with [`--sentieon`](usage.md#--sentieon) +[Sentieon TNscope](https://support.sentieon.com/manual/usages/general/#tnscope-algorithm) is Sentieon's proprietary somatic variant and structural variant caller. -[Sentieon TNscope](https://www.sentieon.com/products/#tnscope) calls SNPs and small indels on an Tumor/Normal pair. +
+VCF-files for tumor-only and tumor/normal samples -For a Tumor/Normal pair: +**Output directory: `{outdir}/variant_calling/sentieon_tnscope//`** -**Output directory: `results/VariantCalling/[TUMOR_vs_NORMAL]/SentieonTNscope`** +- `.tnscope.vcf.gz` and `.tnscope.vcf.gz.tbi` + - VCF with tabix index -- `TNscope_[TUMORSAMPLE]_vs_[NORMALSAMPLE].vcf.gz` and `TNscope_[TUMORSAMPLE]_vs_[NORMALSAMPLE].vcf.gz.tbi` - - `VCF` with Tabix index +
-For further reading and documentation see the [Sentieon TNscope user guide](https://support.sentieon.com/manual/TNscope_usage/tnscope/). +#### Strelka -### Structural Variants +[Strelka](https://github.com/Illumina/strelka) is a fast and accurate small variant caller optimized for analysis of germline variation in small cohorts and somatic variation in tumor/normal sample pairs. For further reading and documentation see the [Strelka user guide](https://github.com/Illumina/strelka/blob/master/docs/userGuide/README.md). If [Strelka](https://github.com/Illumina/strelka) is used for somatic variant calling and [Manta](https://github.com/Illumina/manta) is also specified in tools, the output candidate indels from [Manta](https://github.com/Illumina/manta) are used according to [Strelka Best Practices](https://github.com/Illumina/strelka/blob/master/docs/userGuide/README.md#somatic-configuration-example). +For further downstream analysis, take a look [here](https://github.com/Illumina/strelka/blob/v2.9.x/docs/userGuide/README.md#interpreting-the-germline-multi-sample-variants-vcf). -#### Manta +
+Output files for single samples (normal) -[Manta](https://github.com/Illumina/manta) calls structural variants (SVs) and indels from mapped paired-end sequencing reads. -It is optimized for analysis of germline variation in small sets of individuals and somatic variation in tumor/normal sample pairs. -`Manta` provides a candidate list for small indels that can be fed to `Strelka` following [Strelka Best Practices](https://github.com/Illumina/strelka/blob/master/docs/userGuide/README.md#somatic-configuration-example). +**Output directory: `{outdir}/variant_calling/strelka//`** -For all samples: +- `.strelka.genome.vcf.gz` and `.strelka.genome.vcf.gz.tbi` + - genome VCF with tabix index +- `.strelka.variants.vcf.gz` and `.strelka.variants.vcf.gz.tbi` + - VCF with tabix index with all potential variant loci across the sample. Note this file includes non-variant loci if they have a non-trivial level of variant evidence or contain one or more alleles for which genotyping has been forced. +
+ +
+Output files for tumor/normal paired samples -**Output directory: `results/VariantCalling/[SAMPLE]/Manta`** +**Output directory: `{outdir}/variant_calling/strelka//`** -- `Manta_[SAMPLE].candidateSmallIndels.vcf.gz` and `Manta_[SAMPLE].candidateSmallIndels.vcf.gz.tbi` - - `VCF` with Tabix index -- `Manta_[SAMPLE].candidateSV.vcf.gz` and `Manta_[SAMPLE].candidateSV.vcf.gz.tbi` - - `VCF` with Tabix index +- `.strelka.somatic_indels.vcf.gz` and `.strelka.somatic_indels.vcf.gz.tbi` + - VCF with tabix index with all somatic indels inferred in the tumor sample. +- `.strelka.somatic_snvs.vcf.gz` and `.strelka.somatic_snvs.vcf.gz.tbi` + - VCF with tabix index with all somatic SNVs inferred in the tumor sample. -For Normal sample only: +
-- `Manta_[NORMALSAMPLE].diploidSV.vcf.gz` and `Manta_[NORMALSAMPLE].diploidSV.vcf.gz.tbi` - - `VCF` with Tabix index +### Structural Variants -For a Tumor sample only: +#### indexcov -- `Manta_[TUMORSAMPLE].tumorSV.vcf.gz` and `Manta_[TUMORSAMPLE].tumorSV.vcf.gz.tbi` - - `VCF` with Tabix index +[indexcov](https://github.com/brentp/goleft/tree/master/indexcov) quickly estimate coverage from a whole-genome bam or cram index. +A bam index has 16KB resolution and it is used as a coverage estimate . +The output is scaled to around 1. So a long stretch with values of 1.5 would be a heterozygous duplication. This is useful as a quick QC to get coverage values across the genome. -For a Tumor/Normal pair: +**Output directory: `{outdir}/variant_calling/indexcov/`** -**Output directory: `results/VariantCalling/[TUMOR_vs_NORMAL]/Manta`** +In addition to the interactive HTML files, `indexcov` outputs a number of text files: -- `Manta_[TUMORSAMPLE]_vs_[NORMALSAMPLE].candidateSmallIndels.vcf.gz` and `Manta_[TUMORSAMPLE]_vs_[NORMALSAMPLE].candidateSmallIndels.vcf.gz.tbi` - - `VCF` with Tabix index -- `Manta_[TUMORSAMPLE]_vs_[NORMALSAMPLE].candidateSV.vcf.gz` and `Manta_[TUMORSAMPLE]_vs_[NORMALSAMPLE].candidateSV.vcf.gz.tbi` - - `VCF` with Tabix index -- `Manta_[TUMORSAMPLE]_vs_[NORMALSAMPLE].diploidSV.vcf.gz` and `Manta_[TUMORSAMPLE]_vs_[NORMALSAMPLE].diploidSV.vcf.gz.tbi` - - `VCF` with Tabix index -- `Manta_[TUMORSAMPLE]_vs_[NORMALSAMPLE].somaticSV.vcf.gz` and `Manta_[TUMORSAMPLE]_vs_[NORMALSAMPLE].somaticSV.vcf.gz.tbi` - - `VCF` with Tabix index +- `-indexcov.ped`: a .ped/.fam file with the inferred sex in the appropriate column if the sex chromosomes were found. + the CNX and CNY columns indicating the floating-point estimate of copy-number for those chromosomes. + `bins.out`: how many bins had a coverage value outside of (0.85, 1.15). high values can indicate high-bias samples. + `bins.lo`: number of bins with value < 0.15. high values indicate missing data. + `bins.hi`: number of bins with value > 1.15. + `bins.in`: number of bins with value inside of (0.85, 1.15) + `p.out`: `bins.out/bins.in` + `PC1...PC5`: PCA projections calculated with depth of autosomes. -For further reading and documentation see the [Manta user guide](https://github.com/Illumina/manta/blob/master/docs/userGuide/README.md). +- `-indexcov.roc`: tab-delimited columns of chrom, scaled coverage cutoff, and $n_samples columns where each indicates the + proportion of 16KB blocks at or above that scaled coverage value. +- `-indexcov.bed.gz`: a bed file with columns of chrom, start, end, and a column per sample where the values indicate there + scaled coverage for that sample in that 16KB chunk. -#### TIDDIT +#### Manta -[TIDDIT](https://github.com/SciLifeLab/TIDDIT) identifies intra and inter-chromosomal translocations, deletions, tandem-duplications and inversions. +[Manta](https://github.com/Illumina/manta) calls structural variants (SVs) and indels from mapped paired-end sequencing reads. +It is optimized for analysis of germline variation in small sets of individuals and somatic variation in tumor/normal sample pairs. +[Manta](https://github.com/Illumina/manta) provides a candidate list for small indels that can be fed to [Strelka](https://github.com/Illumina/strelka) following [Strelka Best Practices](https://github.com/Illumina/strelka/blob/master/docs/userGuide/README.md#somatic-configuration-example). For further reading and documentation see the [Manta user guide](https://github.com/Illumina/manta/blob/master/docs/userGuide/README.md). -Germline calls are provided for all samples, to enable comparison of both, tumor and normal, for possible mixup. -Low quality calls are removed internally, to simplify processing of variant calls but they are saved by `Sarek`. +
+Output files for normal samples -For all samples: +**Output directory: `{outdir}/variant_calling/manta//`** -**Output directory: `results/VariantCalling/[SAMPLE]/TIDDIT`** +- `.manta.diploid_sv.vcf.gz` and `.manta.diploid_sv.vcf.gz.tbi` + - VCF with tabix index containing SVs and indels scored and genotyped under a diploid model for the sample. +
-- `TIDDIT_[SAMPLE].vcf.gz` and `TIDDIT_[SAMPLE].vcf.gz.tbi` - - `VCF` with Tabix index -- `TIDDIT_[SAMPLE].signals.tab` - - tab file describing coverage across the genome, binned per 50 bp -- `TIDDIT_[SAMPLE].ploidy.tab` - - tab file describing the estimated ploidy and coverage across each contig -- `TIDDIT_[SAMPLE].old.vcf` - - `VCF` including the low qualiy calls -- `TIDDIT_[SAMPLE].wig` - - wiggle file containing coverage across the genome, binned per 50 bp -- `TIDDIT_[SAMPLE].gc.wig` - - wiggle file containing fraction of gc content, binned per 50 bp +
+Output files for tumor-only samples -For further reading and documentation see the [TIDDIT manual](https://github.com/SciLifeLab/TIDDIT/blob/master/README.md). +**Output directory: `{outdir}/variant_calling/manta//`** -#### Sentieon DNAscope SV +- `.manta.tumor_sv.vcf.gz` and `.manta.tumor_sv.vcf.gz.tbi` + - VCF with tabix index containing a subset of the candidateSV.vcf.gz file after removing redundant candidates and small indels less than the minimum scored variant size (50 by default). The SVs are not scored, but include additional details: (1) paired and split read supporting evidence counts for each allele (2) a subset of the filters from the scored tumor-normal model are applied to the single tumor case to improve precision. +
-> **WARNING** Only with [`--sentieon`](usage.md#--sentieon) +
+Output files for tumor/normal paired samples -[Sentieon DNAscope](https://www.sentieon.com/products) can perform structural variant calling in addition to calling SNPs and small indels. +**Output directory: `{outdir}/variant_calling/manta//`** -For all samples: +- `.manta.diploid_sv.vcf.gz` and `.manta.diploid_sv.vcf.gz.tbi` + - VCF with tabix index containing SVs and indels scored and genotyped under a diploid model for the sample. In the case of a tumor/normal subtraction, the scores in this file do not reflect any information from the tumor sample. +- `.manta.somatic_sv.vcf.gz` and `.manta.somatic_sv.vcf.gz.tbi` + - VCF with tabix index containing SVs and indels scored under a somatic variant model. +
-**Output directory: `results/VariantCalling/[SAMPLE]/SentieonDNAscope`** +#### TIDDIT -- `DNAscope_SV_Sample.vcf.gz` and `DNAscope_SV_Sample.vcf.gz.tbi` - - `VCF` with Tabix index +[TIDDIT](https://github.com/SciLifeLab/TIDDIT) identifies intra and inter-chromosomal translocations, deletions, tandem-duplications and inversions. For further reading and documentation see the [TIDDIT manual](https://github.com/SciLifeLab/TIDDIT/blob/master/README.md). -For further reading and documentation see the [Sentieon DNAscope user guide](https://support.sentieon.com/manual/DNAscope_usage/dnascope/). +
+Output files for normal and tumor-only samples -### Sample heterogeneity, ploidy and CNVs +**Output directory: `{outdir}/variant_calling/tiddit//`** -#### ConvertAlleleCounts +- `.tiddit.vcf.gz` and `.tiddit.vcf.gz.tbi` + - VCF with tabix index containing SV calls +- `.tiddit.ploidies.tab` + - tab file describing the estimated ploidy and coverage across each contig -Running ASCAT on NGS data requires that the `BAM` files are converted into BAF and LogR values. -This can be done using the software [AlleleCount](https://github.com/cancerit/alleleCount) followed by the provided [ConvertAlleleCounts](https://github.com/nf-core/sarek/blob/master/bin/convertAlleleCounts.r) R-script. +
-For a Tumor/Normal pair: +
+Output files for tumor/normal paired samples -**Output directory: `results/VariantCalling/[TUMOR_vs_NORMAL]/ASCAT`** +**Output directory: `{outdir}/variant_calling/tiddit//`** -- `[TUMORSAMPLE].BAF` and `[NORMALSAMPLE].BAF` - - file with beta allele frequencies -- `[TUMORSAMPLE].LogR` and `[NORMALSAMPLE].LogR` - - file with total copy number on a logarithmic scale +- `.tiddit.normal.vcf.gz` and `.tiddit.normal.vcf.gz.tbi` + - VCF with tabix index containing SV calls +- `.tiddit.tumor.vcf.gz` and `.tiddit.tumor.vcf.gz.tbi` + - VCF with tabix index containing SV calls +- `_sv_merge.tiddit.vcf.gz` and `_sv_merge.tiddit.vcf.gz.tbi` + - merged tumor/normal VCF with tabix index +- `.tiddit.ploidies.tab` + - tab file describing the estimated ploidy and coverage across each contig + +
+ +### Sample heterogeneity, ploidy and CNVs #### ASCAT -[ASCAT](https://github.com/Crick-CancerGenomics/ascat) is a software for performing allele-specific copy number analysis of tumor samples and for estimating tumor ploidy and purity (normal contamination). +[ASCAT](https://github.com/VanLoo-lab/ascat) is a software for performing allele-specific copy number analysis of tumor samples and for estimating tumor ploidy and purity (normal contamination). It infers tumor purity and ploidy and calculates whole-genome allele-specific copy number profiles. -`ASCAT` is written in `R` and available here: [github.com/Crick-CancerGenomics/ascat](https://github.com/Crick-CancerGenomics/ascat). -The `ASCAT` process gives several images as output, described in detail in this [book chapter](http://www.ncbi.nlm.nih.gov/pubmed/22130873). - -For a Tumor/Normal pair: - -**Output directory: `results/VariantCalling/[TUMOR_vs_NORMAL]/ASCAT`** - -- `[TUMORSAMPLE].aberrationreliability.png` - - Image with information about aberration reliability -- `[TUMORSAMPLE].ASCATprofile.png` - - Image with information about ASCAT profile -- `[TUMORSAMPLE].ASPCF.png` - - Image with information about ASPCF -- `[TUMORSAMPLE].rawprofile.png` - - Image with information about raw profile -- `[TUMORSAMPLE].sunrise.png` - - Image with information about sunrise -- `[TUMORSAMPLE].tumour.png` - - Image with information about tumor -- `[TUMORSAMPLE].cnvs.txt` - - file with information about CNVS -- `[TUMORSAMPLE].LogR.PCFed.txt` - - file with information about LogR -- `[TUMORSAMPLE].purityploidy.txt` - - file with information about purity ploidy - -The text file `[TUMORSAMPLE].cnvs.txt` countains predictions about copy number state for all the segments. +The [ASCAT](https://github.com/VanLoo-lab/ascat) process gives several images as output, described in detail in this [book chapter](http://www.ncbi.nlm.nih.gov/pubmed/22130873). +Running ASCAT on NGS data requires that the BAM files are converted into BAF and LogR values. +This is done internally using the software [AlleleCount](https://github.com/cancerit/alleleCount). For further reading and documentation see the [ASCAT manual](https://www.crick.ac.uk/research/labs/peter-van-loo/software). + +
+Output files for tumor/normal paired samples + +**Output directory: `{outdir}/variant_calling/ascat//`** + +- `.tumour.ASCATprofile.png` + - image with information about allele-specific copy number profile +- `.tumour.ASPCF.png` + - image with information about allele-specific copy number segmentation +- `.before_correction_Tumour..tumour.png` + - image with information about raw profile of tumor sample of logR and BAF values before GC correction +- `.before_correction_Tumour..germline.png` + - image with information about raw profile of normal sample of logR and BAF values before GC correction +- `.after_correction_GC_Tumour..tumour.png` + - image with information about GC and RT corrected logR and BAF values of tumor sample after GC correction +- `.after_correction_GC_Tumour..germline.png` + - image with information about GC and RT corrected logR and BAF values of normal sample after GC correction +- `.tumour.sunrise.png` + - image visualising the range of ploidy and tumor percentage values +- `.metrics.txt` + - file with information about different metrics from ASCAT profiles +- `.cnvs.txt` + - file with information about CNVS +- `.purityploidy.txt` + - file with information about purity and ploidy +- `.segments.txt` + - file with information about copy number segments +- `.tumour_tumourBAF.txt` and `.tumour_normalBAF.txt` + - file with beta allele frequencies +- `.tumour_tumourLogR.txt` and `.tumour_normalLogR.txt` + - file with total copy number on a logarithmic scale + +The text file `.cnvs.txt` contains predictions about copy number state for all the segments. The output is a tab delimited text file with the following columns: -- *chr*: chromosome number -- *startpos*: start position of the segment -- *endpos*: end position of the segment -- *nMajor*: number of copies of one of the allels (for example the chromosome inherited from the father) -- *nMinor*: number of copies of the other allele (for example the chromosome inherited of the mother) +- _chr_: chromosome number +- _startpos_: start position of the segment +- _endpos_: end position of the segment +- _nMajor_: number of copies of one of the allels (for example the chromosome inherited of one parent) +- _nMinor_: number of copies of the other allele (for example the chromosome inherited of the other parent) -The file `[TUMORSAMPLE].cnvs.txt` contains all segments predicted by ASCAT, both those with normal copy number (nMinor = 1 and nMajor =1) and those corresponding to copy number aberrations. +The file `.cnvs.txt` contains all segments predicted by ASCAT, both those with normal copy number (nMinor = 1 and nMajor =1) and those corresponding to copy number aberrations. -For further reading and documentation see the [ASCAT manual](https://www.crick.ac.uk/research/labs/peter-van-loo/software). +
+ +#### CNVKit + +[CNVKit](https://cnvkit.readthedocs.io/en/stable/) is a toolkit to infer and visualize copy number from high-throughput DNA sequencing data. It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina. For further reading and documentation, see the [CNVKit Documentation](https://cnvkit.readthedocs.io/en/stable/plots.html) + +
+Output files for normal and tumor-only samples + +**Output directory: `{outdir}/variant_calling/cnvkit//`** + +- `.antitargetcoverage.cnn` + - file containing coverage information +- `.targetcoverage.cnn` + - file containing coverage information +- `-diagram.pdf` + - file with plot of copy numbers or segments on chromosomes +- `-scatter.png` + - file with plot of bin-level log2 coverages and segmentation calls +- `.bintest.cns` + - file containing copy number segment information +- `.cnr` + - file containing copy number ratio information +- `.cns` + - file containing copy number segment information +- `.call.cns` + - file containing copy number segment information +- `.genemetrics.tsv` + - file containing per gene copy number information (if input files are annotated) +
+ +
+Output files for tumor/normal samples + +**Output directory: `{outdir}/variant_calling/cnvkit//`** + +- `.antitargetcoverage.cnn` + - file containing coverage information +- `.targetcoverage.cnn` + - file containing coverage information +- `.antitargetcoverage.cnn` + - file containing coverage information +- `.targetcoverage.cnn` + - file containing coverage information +- `.bintest.cns` + - file containing copy number segment information +- `-scatter.png` + - file with plot of bin-level log2 coverages and segmentation calls +- `-diagram.pdf` + - file with plot of copy numbers or segments on chromosomes +- `.cnr` + - file containing copy number ratio information +- `.cns` + - file containing copy number segment information +- `.call.cns` + - file containing copy number segment information +- `.genemetrics.tsv` + - file containing per gene copy number information (if input files are annotated) +
#### Control-FREEC [Control-FREEC](https://github.com/BoevaLab/FREEC) is a tool for detection of copy-number changes and allelic imbalances (including loss of heterozygoity (LOH)) using deep-sequencing data. -`Control-FREEC` automatically computes, normalizes, segments copy number and beta allele frequency profiles, then calls copy number alterations and LOH. -And also detects subclonal gains and losses and evaluate the most likely average ploidy of the sample. +[Control-FREEC](https://github.com/BoevaLab/FREEC) automatically computes, normalizes, segments copy number and beta allele frequency profiles, then calls copy number alterations and LOH. +It also detects subclonal gains and losses and evaluates the most likely average ploidy of the sample. For further reading and documentation see the [Control-FREEC Documentation](http://boevalab.inf.ethz.ch/FREEC/tutorial.html). + +
+Output files for tumor-only and tumor/normal paired samples + +**Output directory: `{outdir}/variant_calling/controlfreec/{tumorsample,tumorsample_vs_normalsample}/`** + +- `config.txt` + - Configuration file used to run Control-FREEC +- `_BAF.png` and `_BAF.png` + - image of BAF plot +- `_ratio.log2.png` and `_ratio.log2.png` + - image of ratio log2 plot +- `_ratio.png` and `_ratio.png` + - image of ratio plot +- `.bed` and `.bed` + - translated output to a .BED file (so to view it in the UCSC Genome Browser) +- `.circos.txt` and `.circos.txt` + - translated output to the Circos format +- `.p.value.txt` and `.p.value.txt` + - CNV file containing p_values for each call +- `_BAF.txt` and `.mpileup.gz_BAF.txt` + - file with beta allele frequencies for each possibly heterozygous SNP position +- `.tumor.mpileup.gz_CNVs` + - file with coordinates of predicted copy number alterations +- `_info.txt` and `.tumor.mpileup.gz_info.txt` + - parsable file with information about FREEC run +- ` _ratio.BedGraph` and `.tumor.mpileup.gz_ratio.BedGraph ` + - file with ratios in BedGraph format for visualization in the UCSC genome browser. The file contains tracks for normal copy number, gains and losses, and copy neutral LOH (\*). +- `_ratio.txt` and `.tumor.mpileup.gz_ratio.txt` + - file with ratios and predicted copy number alterations for each window +- `_sample.cpn` and `.tumor.mpileup.gz_sample.cpn` + - files with raw copy number profiles for the tumor sample +- `.normal.mpileup.gz_control.cpn` + - files with raw copy number profiles for the control sample +- `.cpn>` + - file with GC-content profile + +
+ +### Microsatellite instability (MSI) + +[Microsatellite instability](https://en.wikipedia.org/wiki/Microsatellite_instability) is a genetic condition associated with deficiencies in the mismatch repair (MMR) system which causes a tendency to accumulate a high number of mutations (SNVs and indels). +An altered distribution of microsatellite length is associated with a missed replication slippage which would be corrected under normal MMR conditions. + +#### MSIsensor2 -For a Tumor/Normal pair: +[MSIsensor2](https://github.com/niu-lab/msisensor2) is a tool to detect the MSI status for tumor-only sequencing data, including Cell-Free DNA (cfDNA), Formalin-Fixed Paraffin-Embedded(FFPE) and other sample types. -**Output directory: `results/VariantCalling/[TUMOR_vs_NORMAL]/ControlFREEC`** +
+ +Output files for tumor only samples + +**Output directory: `{outdir}/variant_calling/msisensor2//`** + +- `` + - MSI score output, contains information about the number of somatic sites. +- `_dis` + - The normal and tumor length distribution for each microsatellite position. +- `_somatic` + - Somatic sites detected. +
+ +#### MSIsensorPro + +[MSIsensorPro](https://github.com/xjtu-omics/msisensor-pro) is a tool to detect the MSI status of a tumor scanning the length of the microsatellite regions. +It requires a normal sample for each tumour to differentiate the somatic and germline cases. For further reading see the [MSIsensor paper](https://www.ncbi.nlm.nih.gov/pubmed/24371154). + +
+Output files for tumor/normal paired samples + +**Output directory: `{outdir}/variant_calling/msisensor//`** + +- `` + - MSI score output, contains information about the number of somatic sites. +- `_dis` + - The normal and tumor length distribution for each microsatellite position. +- `_germline` + - Germline sites detected. +- `_somatic` + - Somatic sites detected. +
-- `[TUMORSAMPLE]_vs_[NORMALSAMPLE].config.txt` - - Configuration file used to run Control-FREEC -- `[TUMORSAMPLE].pileup.gz_CNVs` and `[TUMORSAMPLE].pileup.gz_normal_CNVs` - - file with coordinates of predicted copy number alterations -- `[TUMORSAMPLE].pileup.gz_ratio.txt` and `[TUMORSAMPLE].pileup.gz_normal_ratio.txt` - - file with ratios and predicted copy number alterations for each window -- `[TUMORSAMPLE].pileup.gz_BAF.txt` and `[NORMALSAMPLE].pileup.gz_BAF.txt` - - file with beta allele frequencies for each possibly heterozygous SNP position +## Post Variant Calling -For further reading and documentation see the [Control-FREEC manual](http://boevalab.com/FREEC/tutorial.html). +Optional steps to further filter or fine tune variant calling results. There are two branch: `Varlociraptor` or `bcftools` (filtering, normalisation, and concatenation). -### MSI status +### Varlociraptor -[Microsatellite instability](https://en.wikipedia.org/wiki/Microsatellite_instability) is a genetic condition associated to deficiencies in the mismatch repair (MMR) system which causes a tendency to accumulate a high number of mutations (SNVs and indels). -An altered distribution of microsatellite length is associated to a missed replication slippage which would be corrected under normal MMR conditions. +As varlociraptor requires to provide a set of candidate variants to consider it can be run in combination with any variant caller. -#### MSIsensor +
+Output files for germline samples + +**Output directory: `{outdir}/variant_calling/varlociraptor/{sample}`** + +- `..germline.varlociraptor.vcf.gz` and `..germline.varlociraptor.vcf.gz.tbi` + - Final VCF with tabix index +- `/.scenario.varlociraptor.yaml` + - YAML file containing scenario for varlociraptor calling +- `/.alignment-properties.json` + - JSON file containing alignment properties for normal sample cram +
+ +
+Postprocessed VCF files for tumor-normal calling + +**Output directory: `{outdir}/variant_calling/varlociraptor/{tumorsample_vs_normalsample}`** + +- `_vs_...somatic.varlociraptor.vcf.gz` and `_vs_...somatic.varlociraptor.vcf.gz.tbi` + - Final VCF with tabix index +- `_vs_./_vs_..scenario.varlociraptor.yaml` + - YAML file containing scenario for varlociraptor calling (somatic calling) +- `_vs_./.alignment-properties.json` + - JSON file containing alignment properties for normal sample cram +- `_vs_./.tumor.alignment-properties.json` + - JSON file containing alignment properties for tumor sample cram +- `..merged.vcf.gz` + - VCF containing both somatic and germline variants +
-[MSIsensor](https://github.com/ding-lab/msisensor) is a tool to detect the MSI status of a tumor scanning the length of the microsatellite regions. -It requires a normal sample for each tumour to differentiate the somatic and germline cases. +
+Output files for tumor only samples + +**Output directory: `{outdir}/variant_calling/varlociraptor/{sample}`** + +- `..tumor_only.varlociraptor.vcf.gz` and `..tumor_only.varlociraptor.vcf.gz.tbi` + - Final VCF with tabix index +- `/.scenario.varlociraptor.yaml` + - YAML file containing scenario for varlociraptor calling +- `/.alignment-properties.json` + - JSON file containing alignment properties for tumor_only sample cram +
-For a Tumor/Normal pair: +### Filtering -**Output directory: `results/VariantCalling/[TUMORSAMPLE]_vs_[NORMALSAMPLE]/MSIsensor`** +VCFs from all variantcallers can be filtered using `bcftools view`. Filtering is enabled by setting `--filter_vcfs` parameter. By default, variants are filtered to include only those with `PASS` in the FILTER field. Custom filtering criteria can be specified using the `--bcftools_filter_criteria` parameter (see [bcftools view documentation](https://samtools.github.io/bcftools/bcftools.html#view) for filter syntax). + +
+Filtered VCF-files for normal and tumor samples -- `[TUMORSAMPLE]_vs_[NORMALSAMPLE]_msisensor` - - MSI score output, contains information about the number of somatic sites. -- `[TUMORSAMPLE]_vs_[NORMALSAMPLE]_msisensor_dis` - - The normal and tumor length distribution for each microsatellite position. -- `[TUMORSAMPLE]_vs_[NORMALSAMPLE]_msisensor_germline` - - Somatic sites detected. -- `[TUMORSAMPLE]_vs_[NORMALSAMPLE]_msisensor_somatic` - - Germline sites detected. +**Output directory: `{outdir}/variant_calling/filtered//`** -For further reading see the [MSIsensor paper](https://www.ncbi.nlm.nih.gov/pubmed/24371154). +- `..bcftools_filtered.vcf.gz` and `..bcftools_filtered.vcf.gz.tbi` + - VCF with tabix index containing filtered variants + +
+ +### Normalization + +All VCFs are normalized with `bcftools norm`. The field `SOURCE` is added to the VCF header to report the variant caller. + +
+Normalized VCF-files for normal and tumor samples + +**Output directory: `{outdir}/variant_calling/normalized//`** + +- `..norm.sorted.vcf.gz` and `..norm.sorted.vcf.gz.tbi` + - VCF with tabix index containing normalized variants + +
+ +### Consensus calling + +When `--snv_consensus_calling` is enabled, consensus VCFs are generated from a set of multiple VCF files by using `bcftools isec` to identify variants that are called by multiple tools. + +Strelka somatic calling results produces separate VCF files for SNPs and indels that are concatenated before consensus calling. The workflow then groups VCF files by sample and performs consensus calling across all specified variant callers. + +By default, `bcftools isec` identifies variants present in at least a minimum number of input VCF files. This can be customized with `--consensus_min_count`. When annotation is enabled, both the consensus VCF and the individual caller VCFs are annotated. + +
+Consensus called VCF files for all samples + +**Output directory: `{outdir}/variant_calling/consensus//`** + +- `.consensus.vcf.gz` and `.consensus.vcf.gz.tbi` + - VCF with tabix index containing variants present in the consensus set of input variant callers. Built from the `sites.txt` file generated by `bcftools isec`. Each variant includes `CALLERS` (which callers found this variant) and `NCALLERS` (number of callers) INFO fields. +- `_consensus/` + - Directory containing intermediate `bcftools isec` output files: + - `0000.vcf.gz`, `0001.vcf.gz`, etc. - VCFs with variants unique to or shared between specific caller combinations + - `README.txt` - describes which numbered files correspond to which variant callers + - `sites.txt` - lists genomic positions and their presence/absence across all input VCF files + +
+ +### Concatenation + +Germline VCFs from `DeepVariant`, `FreeBayes`, `HaplotypeCaller`, `Haplotyper`, `Manta`, `bcftools mpileup`, `Strelka`, or `Tiddit` are concatenated with `bcftools concat`. The field `SOURCE` is added to the VCF header to report the variant caller. + +
+Concatenated VCF-files for normal samples + +**Output directory: `{outdir}/variant_calling/concat//`** + +- `.germline.vcf.gz` and `.germline.vcf.gz.tbi` + - VCF with tabix index containing concatenated germline variants + +
## Variant annotation -This directory contains results from the final annotation steps: two tools are used for annotation, [snpEff](http://snpeff.sourceforge.net/) and [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html). -Only a subset of the `VCF` files are annotated, and only variants that have a `PASS` filter. -Currently, `FreeBayes` results are not annotated as we are lacking a decent somatic filter. +This directory contains results from the final annotation steps: two tools are used for annotation, [snpEff](http://snpeff.sourceforge.net/) and [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html). Both results can also be combined by setting `--tools merge`. +All variants present in the called VCF files are annotated. For some variant callers this can mean that the variants are already filtered by `PASS`, for some this needs to be done during post-processing. ### snpEff [snpeff](http://snpeff.sourceforge.net/) is a genetic variant annotation and effect prediction toolbox. It annotates and predicts the effects of variants on genes (such as amino acid changes) using multiple databases for annotations. -The generated `VCF` header contains the software version and the used command line. - -For all samples: +The generated VCF header contains the software version and the used command line. For further reading and documentation see the [snpEff manual](http://snpeff.sourceforge.net/SnpEff_manual.html#outputSummary). -**Output directory: `results/Annotation/[SAMPLE]/snpEff`** +
+Output files for all samples -- `VariantCaller_Sample_snpEff.ann.vcf.gz` and `VariantCaller_Sample_snpEff.ann.vcf.gz.tbi` - - `VCF` with Tabix index +**Output directory: `{outdir}/annotation/{sample,tumorsample_vs_normalsample}`** -For further reading and documentation see the [snpEff manual](http://snpeff.sourceforge.net/SnpEff_manual.html#outputSummary) +- `{sample,tumorsample_vs_normalsample}._snpEff.ann.vcf.gz` and `{sample,tumorsample_vs_normalsample}._snpEff.ann.vcf.gz.tbi` + - VCF with tabix index +
### VEP [VEP (Variant Effect Predictor)](https://www.ensembl.org/info/docs/tools/vep/index.html), based on `Ensembl`, is a tool to determine the effects of all sorts of variants, including SNPs, indels, structural variants, CNVs. -The generated `VCF` header contains the software version, also the version numbers for additional databases like `Clinvar` or `dbSNP` used in the `VEP` line. -The format of the [consequence annotations](https://www.ensembl.org/info/genome/variation/prediction/predicted_data.html) is also in the `VCF` header describing the `INFO` field. +The generated VCF header contains the software version, also the version numbers for additional databases like [Clinvar](https://www.ncbi.nlm.nih.gov/clinvar/) or [dbSNP](https://www.ncbi.nlm.nih.gov/snp/) used in the [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html) line. +The format of the [consequence annotations](https://www.ensembl.org/info/genome/variation/prediction/predicted_data.html) is also in the VCF header describing the `INFO` field. +For further reading and documentation see the [VEP manual](https://www.ensembl.org/info/docs/tools/vep/index.html). + Currently, it contains: -- *Consequence*: impact of the variation, if there is any -- *Codons*: the codon change, i.e. cGt/cAt -- *Amino_acids*: change in amino acids, i.e. R/H if there is any -- *Gene*: ENSEMBL gene name -- *SYMBOL*: gene symbol -- *Feature*: actual transcript name -- *EXON*: affected exon -- *PolyPhen*: prediction based on [PolyPhen](http://genetics.bwh.harvard.edu/pph2/) -- *SIFT*: prediction by [SIFT](http://sift.bii.a-star.edu.sg/) -- *Protein_position*: Relative position of amino acid in protein -- *BIOTYPE*: Biotype of transcript or regulatory feature +- _Consequence_: impact of the variation, if there is any +- _Codons_: the codon change, i.e. cGt/cAt +- _Amino_acids_: change in amino acids, i.e. R/H if there is any +- _Gene_: ENSEMBL gene name +- _SYMBOL_: gene symbol +- _Feature_: actual transcript name +- _EXON_: affected exon +- _PolyPhen_: prediction based on [PolyPhen](http://genetics.bwh.harvard.edu/pph2/) +- _SIFT_: prediction by [SIFT](http://sift.bii.a-star.edu.sg/) +- _Protein_position_: Relative position of amino acid in protein +- _BIOTYPE_: Biotype of transcript or regulatory feature + +plus any additional fields selected via the plugins: [Condel](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#condel), [dbNSFP](https://sites.google.com/site/jpopgen/dbNSFP), [LOFTEE](https://github.com/konradjk/loftee), [Mastermind](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#mastermind), [Phenotypes](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#phenotypes), [SpliceAI](https://spliceailookup.broadinstitute.org/), [SpliceRegion](https://www.ensembl.info/2018/10/26/cool-stuff-the-vep-can-do-splice-site-variant-annotation/). + +
+Output files for all samples + +**Output directory: `{outdir}/annotation/{sample,tumorsample_vs_normalsample}`** -For all samples: +- `{sample,tumorsample_vs_normalsample}._VEP.ann.vcf.gz` and `{sample,tumorsample_vs_normalsample}._VEP.ann.vcf.gz.tbi` + - VCF with tabix index -**Output directory: `results/Annotation/[SAMPLE]/VEP`** +
-- `VariantCaller_Sample_VEP.ann.vcf.gz` and `VariantCaller_Sample_VEP.ann.vcf.gz.tbi` - - `VCF` with Tabix index +### BCFtools annotate -For further reading and documentation see the [VEP manual](https://www.ensembl.org/info/docs/tools/vep/index.html) +[BCFtools annotate](https://samtools.github.io/bcftools/bcftools.html#annotate) is used to add annotations to VCF files. The annotations are added to the INFO column of the VCF file. The annotations are added to the VCF header and the VCF header is updated with the new annotations. For further reading and documentation see the [BCFtools annotate manual](https://samtools.github.io/bcftools/bcftools.html#annotate). -## QC and reporting +
+Output files for all samples -### QC +- `{sample,tumorsample_vs_normalsample}._bcf.ann.vcf.gz` and `{sample,tumorsample_vs_normalsample}._bcf.ann.vcf.gz.tbi` + - VCF with tabix index + +
+ +## Quality control and reporting + +### Quality control #### FastQC [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). -For all samples: +The plots display: + +- Sequence counts for each sample. +- Sequence Quality Histograms: The mean quality value across each base position in the read. +- Per Sequence Quality Scores: The number of reads with average quality scores. Shows if a subset of reads has poor quality. +- Per Base Sequence Content: The proportion of each base position for which each of the four normal DNA bases has been called. +- Per Sequence GC Content: The average GC content of reads. Normal random library typically have a roughly normal distribution of GC content. +- Per Base N Content: The percentage of base calls at each position for which an N was called. +- Sequence Length Distribution. +- Sequence Duplication Levels: The relative level of duplication found for each sequence. +- Overrepresented sequences: The total amount of overrepresented sequences found in each library. +- Adapter Content: The cumulative percentage count of the proportion of your library which has seen each of the adapter sequences at each position. + +
+Output files for all samples + +**Output directory: `{outdir}/reports/fastqc/`** + +- `_fastqc.html` and `_fastqc.html` + - [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) report containing quality metrics for your untrimmed raw FastQ files +- `_fastqc.zip` and `_fastqc.zip` + - Zip archive containing the [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) report, tab-delimited data file and plot images -**Output directory: `results/Reports/[SAMPLE]/fastqc`** +:::note +The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. +They may contain adapter sequence and potentially regions with low quality. +::: -- `sample_R1_XXX_fastqc.html` and `sample_R2_XXX_fastqc.html` - - `FastQC` report containing quality metrics for your untrimmed raw `FASTQ` files -- `sample_R1_XXX_fastqc.zip` and `sample_R2_XXX_fastqc.zip` - - Zip archive containing the FastQC report, tab-delimited data file and plot images +
-> **NB:** The `FastQC` plots displayed in the `MultiQC` report shows _untrimmed_ reads. -> They may contain adapter sequence and potentially regions with low quality. +#### FastP -- `fastqc/` - - `*_fastqc.html`: FastQC report containing quality metrics. - - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +[FastP](https://github.com/OpenGene/fastp) is a tool designed to provide all-in-one preprocessing for FastQ files and is used for trimming and splitting. The tool then determines QC metrics for the processed reads. + +
+Output files for all samples -#### bamQC +**Output directory: `{outdir}/reports/fastp/`** -[Qualimap bamqc](http://qualimap.bioinfo.cipf.es/) reports information for the evaluation of the quality of the provided alignment data. +- `_fastp.html` + - report in HTML format +- `_fastp.json` + - report in JSON format +- `_fastp.log` + - FastQ log file + +
+ +#### Mosdepth + +[Mosdepth](https://github.com/brentp/mosdepth) reports information for the evaluation of the quality of the provided alignment data. In short, the basic statistics of the alignment (number of reads, coverage, GC-content, etc.) are summarized and a number of useful graphs are produced. +For further reading and documentation see the [Mosdepth documentation](https://github.com/brentp/mosdepth). -Plot will show: +Plots will show: -- Stats by non-reference allele frequency, depth distribution, stats by quality and per-sample counts, singleton stats, etc. +- cumulative coverage distribution +- absolute coverage distribution +- average coverage per contig/chromosome + +
+Output files for all samples -For all samples: +**Output directory: `{outdir}/reports/mosdepth/`** -**Output directory: `results/Reports/[SAMPLE]/bamQC`** +- `.{sorted,md,recal}.mosdepth.global.dist.txt` + - file used by [MultiQC](https://multiqc.info/), if `.region` file does not exist +- `.{sorted,md,recal}.mosdepth.region.dist.txt` + - file used by [MultiQC](https://multiqc.info/) +- `.{sorted,md,recal}.mosdepth.summary.txt` + -A summary of mean depths per chromosome and within specified regions per chromosome. +- `.{sorted,md,recal}.{per-base,regions}.bed.gz` + - per-base depth for targeted data, per-window (500bp) depth of WGS +- `.{sorted,md,recal}.regions.bed.gz.csi` + - CSI index for per-base depth for targeted data, per-window (500bp) depth of WGS +
-- `VariantCaller_[SAMPLE].bcf.tools.stats.out` - - Raw statistics used by `MultiQC` +#### NGSCheckMate -For further reading and documentation see the [Qualimap bamqc manual](http://qualimap.bioinfo.cipf.es/doc_html/analysis.html#id7) +[NGSCheckMate](https://github.com/parklab/NGSCheckMate) is a tool for determining whether samples come from the same genetic individual, using a set of commonly heterozygous SNPs. This enables for the detecting of sample mislabelling events. The output includes a text file indicating whether samples have matched or not according to the algorithm, as well as a dendrogram visualising these results. + +
+Output files for all samples + +**Output directory: `{outdir}/reports/ngscheckmate/`** + +- `ngscheckmate_all.txt` + - Tab delimited text file listing all the comparisons made, whether they were considered as a match, with the correlation and a normalised depth. +- `ngscheckmate_matched.txt` + - Tab delimited text file listing only the comparison that were considered to match, with the correlation and a normalised depth. +- `ngscheckmate_output_corr_matrix.txt` + - Tab delimited text file containing a matrix of all correlations for all comparisons made. +- `vcfs/.vcf.gz` + - Set of vcf files for each sample. Contains calls for the set of SNP positions used to calculate sample relatedness. +
#### GATK MarkDuplicates reports @@ -623,54 +1259,76 @@ More information in the [GATK MarkDuplicates section](#gatk-markduplicates) Duplicates can arise during sample preparation _e.g._ library construction using PCR. Duplicate reads can also result from a single amplification cluster, incorrectly detected as multiple clusters by the optical sensor of the sequencing instrument. -These duplication artifacts are referred to as optical duplicates. +These duplication artifacts are referred to as optical duplicates. If [GATK MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/5358880192027-MarkDuplicates-Picard-) is used, the metrics file generated by the tool is used, if [`GATK MarkDuplicatesSpark`](https://gatk.broadinstitute.org/hc/en-us/articles/5358833264411-MarkDuplicatesSpark) is used the report is generated by [GATK4 EstimateLibraryComplexity](https://gatk.broadinstitute.org/hc/en-us/articles/5358838684187-EstimateLibraryComplexity-Picard-) on the mapped BAM files. +For further reading and documentation see the [MarkDuplicates manual](https://software.broadinstitute.org/gatk/documentation/tooldocs/4.1.2.0/picard_sam_markduplicates_MarkDuplicates.php). + +The plot will show: -For all samples: +- duplication statistics -**Output directory: `results/Reports/[SAMPLE]/MarkDuplicates`** +
+Output files for all samples -- `[SAMPLE].bam.metrics` - - Raw statistics used by `MultiQC` +**Output directory: `{outdir}/reports/markduplicates/`** -For further reading and documentation see the [MarkDuplicates manual](https://software.broadinstitute.org/gatk/documentation/tooldocs/4.1.2.0/picard_sam_markduplicates_MarkDuplicates.php). +- `.md.cram.metrics` + - file used by [MultiQC](https://multiqc.info/) +
+ +#### Sentieon Dedup reports + +Sentieon's DNAseq subroutine Dedup produces a metrics report much like the one produced by GATK's MarkDuplicates. The Dedup metrics are imported into MultiQC as custom content and displayed in a table. + +
+Output files for all samples + +**Output directory: `{outdir}/reports/sentieon_dedup/`** + +- `.dedup.cram.metrics` + - file used by [MultiQC](https://multiqc.info/). +
#### samtools stats -[samtools stats](https://www.htslib.org/doc/samtools.html) collects statistics from `BAM` files and outputs in a text format. +[samtools stats](https://www.htslib.org/doc/samtools.html) collects statistics from CRAM files and outputs in a text format. +For further reading and documentation see the [`samtools` manual](https://www.htslib.org/doc/samtools.html#COMMANDS_AND_OPTIONS). -Plots will show: +The plots will show: - Alignment metrics. -For all samples: +
+Output files for all samples -**Output directory: `results/Reports/[SAMPLE]/SamToolsStats`** +**Output directory: `{outdir}/reports/samtools/`** -- `[SAMPLE].bam.samtools.stats.out` - - Raw statistics used by `MultiQC` +- `.{sorted,md,recal}.samtools.stats.out` + - Raw statistics used by `MultiQC` -For further reading and documentation see the [`samtools` manual](https://www.htslib.org/doc/samtools.html#COMMANDS_AND_OPTIONS) +
#### bcftools stats -[bcftools](https://samtools.github.io/bcftools/) is a program for variant calling and manipulating `VCF` files. +[bcftools stats](https://samtools.github.io/bcftools/bcftools.html#stats) produces a statistics text file which is suitable for machine processing and can be plotted using plot-vcfstats. +For further reading and documentation see the [bcftools stats manual](https://samtools.github.io/bcftools/bcftools.html#stats). -Plot will show: +Plots will show: - Stats by non-reference allele frequency, depth distribution, stats by quality and per-sample counts, singleton stats, etc. +- Note: When using [Strelka](https://github.com/Illumina/strelka), there will be no depth distribution plot, as Strelka does not report the INFO/DP field -For all samples: - -**Output directory: `results/Reports/[SAMPLE]/BCFToolsStats`** +
+Output files for all samples -- `VariantCaller_[SAMPLE].bcf.tools.stats.out` - - Raw statistics used by `MultiQC` +**Output directory: `{outdir}/reports/bcftools/`** -For further reading and documentation see the [bcftools stats manual](https://samtools.github.io/bcftools/bcftools.html#stats) +- `..bcftools_stats.txt` + - Raw statistics used by `MultiQC` +
#### VCFtools -[VCFtools](https://vcftools.github.io/) is a program package designed for working with `VCF` files. +[VCFtools](https://vcftools.github.io/) is a program package designed for working with VCF files. For further reading and documentation see the [VCFtools manual](https://vcftools.github.io/man_latest.html#OUTPUT%20OPTIONS). Plots will show: @@ -678,56 +1336,56 @@ Plots will show: - the transition to transversion ratio as a function of alternative allele count (using only bi-allelic SNPs). - the transition to transversion ratio as a function of SNP quality threshold (using only bi-allelic SNPs). -For all samples: - -**Output directory: `results/Reports/[SAMPLE]/VCFTools`** +
+Output files for all samples -- `VariantCaller_[SAMPLE].FILTER.summary` - - Raw statistics used by `MultiQC` -- `VariantCaller_[SAMPLE].TsTv.count` - - Raw statistics used by `MultiQC` -- `VariantCaller_[SAMPLE].TsTv.qual` - - Raw statistics used by `MultiQC` +**Output directory: `{outdir}/reports/vcftools/`** -For further reading and documentation see the [VCFtools manual](https://vcftools.github.io/man_latest.html#OUTPUT%20OPTIONS) +- `..FILTER.summary` + - Raw statistics used by `MultiQC` with a summary of the number of SNPs and Ts/Tv ratio for each FILTER category +- `..TsTv.count` + - Raw statistics used by `MultiQC` with the Transition / Transversion ratio as a function of alternative allele count. Only uses bi-allelic SNPs. +- `..TsTv.qual` + - Raw statistics used by `MultiQC` with Transition / Transversion ratio as a function of SNP quality threshold. Only uses bi-allelic SNPs. +
#### snpEff reports [snpeff](http://snpeff.sourceforge.net/) is a genetic variant annotation and effect prediction toolbox. -It annotates and predicts the effects of variants on genes (such as amino acid changes) using multiple databases for annotations. +It annotates and predicts the effects of variants on genes (such as amino acid changes) using multiple databases for annotations. For further reading and documentation see the [snpEff manual](http://snpeff.sourceforge.net/SnpEff_manual.html#outputSummary). -Plots will shows : +The plots will show: - locations of detected variants in the genome and the number of variants for each location. - the putative impact of detected variants and the number of variants for each impact. - the effect of variants at protein level and the number of variants for each effect type. - the quantity as function of the variant quality score. -For all samples: - -**Output directory: `results/Reports/[SAMPLE]/snpEff`** +
+Output files for all samples -- `VariantCaller_Sample_snpEff.csv` - - Raw statistics used by `MultiQC` -- `VariantCaller_Sample_snpEff.html` - - Statistics to be visualised with a web browser -- `VariantCaller_Sample_snpEff.genes.txt` - - TXT (tab separated) summary counts for variants affecting each transcript and gene +**Output directory: `{outdir}/reports/SnpEff/{sample,tumorsample_vs_normalsample}//`** -For further reading and documentation see the [snpEff manual](http://snpeff.sourceforge.net/SnpEff_manual.html#outputSummary) +- `._snpEff.csv` + - Raw statistics used by [MultiQC](http://multiqc.info) +- `._snpEff.html` + - Statistics to be visualised with a web browser +- `._snpEff.genes.txt` + - TXT (tab separated) summary counts for variants affecting each transcript and gene +
#### VEP reports -[VEP (Variant Effect Predictor)](https://www.ensembl.org/info/docs/tools/vep/index.html), based on `Ensembl`, is a tools to determine the effects of all sorts of variants, including SNPs, indels, structural variants, CNVs. +[VEP (Variant Effect Predictor)](https://www.ensembl.org/info/docs/tools/vep/index.html), based on `Ensembl`, is a tool to determine the effects of all sorts of variants, including SNPs, indels, structural variants, CNVs. For further reading and documentation see the [VEP manual](https://www.ensembl.org/info/docs/tools/vep/index.html) -For all samples: - -**Output directory: `results/Reports/[SAMPLE]/VEP`** +
+Output files for all samples -- `VariantCaller_Sample_VEP.summary.html` - - Summary of the VEP run to be visualised with a web browser +**Output directory: `{outdir}/reports/EnsemblVEP/{sample,tumorsamplt_vs_normalsample}//`** -For further reading and documentation see the [VEP manual](https://www.ensembl.org/info/docs/tools/vep/index.html) +- `._VEP.summary.html` + - Summary of the VEP run to be visualised with a web browser +
### Reporting @@ -735,27 +1393,16 @@ For further reading and documentation see the [VEP manual](https://www.ensembl.o [MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarizing all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. - -The pipeline has special steps which also allow the software versions to be reported in the `MultiQC` output for future traceability. +Results generated by MultiQC collect pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see .
Output files - `multiqc/` - - `multiqc_report.html` - - Standalone HTML file that can be viewed in your web browser - - `multiqc_data/` - - Directory containing parsed statistics from the different tools used in the pipeline - - `multiqc_plots/` - - Directory containing static images from the report in various formats - -For more information about how to use `MultiQC` reports, see [https://multiqc.info](https://multiqc.info). - -
- -[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. - -Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + ### Pipeline information @@ -763,10 +1410,43 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ Output files - `pipeline_info/` - - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Reports generated by Nextflow: `execution_report_.html`, `execution_timeline_.html`, `execution_trace_.txt`, `pipeline_dag_.dot`/`pipeline_dag_.svg` and `manifest_.bco.json`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Parameters used by the pipeline run: `params_.json`. [Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. + +## Reference files + +Contains reference folders generated by the pipeline. These files are only published, if `--save_reference` is set. + +
+Output files + +- `bwa/` + - Index corresponding to the [BWA](https://github.com/lh3/bwa) aligner +- `bwamem2/` + - Index corresponding to the [BWA-mem2](https://github.com/bwa-mem2/bwa-mem2) aligner +- `cnvkit/` + - Reference files generated by [CNVKit](https://cnvkit.readthedocs.io/en/stable/) +- `dragmap/` + - Index corresponding to the [DragMap](https://github.com/Illumina/dragmap) aligner +- `dbsnp/` + - Tabix index generated by [Tabix](http://www.htslib.org/doc/tabix.html) from the given dbsnp file +- `dict/` + - Sequence dictionary generated by [GATK4 CreateSequenceDictionary](https://gatk.broadinstitute.org/hc/en-us/articles/5358872471963-CreateSequenceDictionary-Picard-) from the given fasta +- `fai/` + - Fasta index generated with [samtools faidx](http://www.htslib.org/doc/samtools-faidx.html) from the given fasta +- `germline_resource/` + - Tabix index generated by [Tabix](http://www.htslib.org/doc/tabix.html) from the given gernline resource file +- `intervals/` + - Bed files in various stages: .bed, .bed.gz, .bed per chromosome, .bed.gz per chromsome +- `known_indels/` + - Tabix index generated by [Tabix](http://www.htslib.org/doc/tabix.html) from the given known indels file +- `msi/` + - [MSIsensorPro](https://github.com/xjtu-omics/msisensor-pro) scan of the reference genome to get microsatellites information +- `pon/` + - Tabix index generated by [Tabix](http://www.htslib.org/doc/tabix.html) from the given panel-of-normals file +
diff --git a/docs/posters/EMBO_2022_FHanssen.pdf b/docs/posters/EMBO_2022_FHanssen.pdf new file mode 100644 index 0000000000..068e837c29 Binary files /dev/null and b/docs/posters/EMBO_2022_FHanssen.pdf differ diff --git a/docs/posters/ESHG_2017_Mgarcia.svg b/docs/posters/ESHG_2017_Mgarcia.svg index d8be275f2f..6de0ebca57 100644 --- a/docs/posters/ESHG_2017_Mgarcia.svg +++ b/docs/posters/ESHG_2017_Mgarcia.svg @@ -11014,4 +11014,4 @@ clip-path="url(#clipPath2)" d="m 241,499.2656 5,-12 -5,3 -5,-3 z" inkscape:connector-curvature="0" - style="stroke:none;stroke-width:1" /> \ No newline at end of file + style="stroke:none;stroke-width:1" /> diff --git a/docs/posters/ISMB_ECCB_2023_FHanssen.pdf b/docs/posters/ISMB_ECCB_2023_FHanssen.pdf new file mode 100644 index 0000000000..e99c617057 Binary files /dev/null and b/docs/posters/ISMB_ECCB_2023_FHanssen.pdf differ diff --git a/docs/posters/NextflowSummit_2022_FHanssen.pdf b/docs/posters/NextflowSummit_2022_FHanssen.pdf new file mode 100644 index 0000000000..aa3f91d072 Binary files /dev/null and b/docs/posters/NextflowSummit_2022_FHanssen.pdf differ diff --git a/docs/posters/PMC_2018_Mgarcia.svg b/docs/posters/PMC_2018_Mgarcia.svg index 9201e86332..6a3c6e1db0 100644 --- a/docs/posters/PMC_2018_Mgarcia.svg +++ b/docs/posters/PMC_2018_Mgarcia.svg @@ -12092,4 +12092,4 @@ inkscape:connector-curvature="0" id="path5306-6" style="fill:none;stroke:#000000;stroke-width:3.49964452;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:10;stroke-dasharray:none;stroke-opacity:1" - d="m 736.2502,444.00009 c 0,-26.6476 -21.6022,-48.25019 -48.25018,-48.25019 -26.6476,0 -48.25018,21.60259 -48.25018,48.25019 0,26.64759 21.60258,48.25017 48.25018,48.25017 26.64798,0 48.25018,-21.60258 48.25018,-48.25017 z" /> \ No newline at end of file + d="m 736.2502,444.00009 c 0,-26.6476 -21.6022,-48.25019 -48.25018,-48.25019 -26.6476,0 -48.25018,21.60259 -48.25018,48.25019 0,26.64759 21.60258,48.25017 48.25018,48.25017 26.64798,0 48.25018,-21.60258 48.25018,-48.25017 z" /> diff --git a/docs/posters/QBiC_Symposium_2022_FHanssen.pdf b/docs/posters/QBiC_Symposium_2022_FHanssen.pdf new file mode 100644 index 0000000000..c981734fac Binary files /dev/null and b/docs/posters/QBiC_Symposium_2022_FHanssen.pdf differ diff --git a/docs/usage.md b/docs/usage.md index 643db4b71a..44d536c97f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -4,228 +4,470 @@ > _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ -## Introduction +# Introduction -Sarek is a workflow designed to detect variants on whole genome or targeted sequencing data. -Initially designed for Human, and Mouse, it can work on any species with a reference genome. -Sarek can also handle tumour / normal pairs and could include additional relapses. +Sarek is a workflow designed to detect germline and somatic variants on whole genome, whole exome, or targeted sequencing data. -## Samplesheet input +Initially designed for human and mouse, it can work on any species if a reference genome is available. +Sarek is designed to handle single samples, such as single-normal or single-tumor samples, and tumor-normal pairs including additional relapses. -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +# Running the pipeline -```console ---input '[path to samplesheet file]' +## Quickstart + +The typical command for running the pipeline is as follows: + +```bash +nextflow run nf-core/sarek -r -profile --input ./samplesheet.csv --outdir ./results --genome GATK.GRCh38 --tools ``` -### Multiple runs of the same sample +`-r ` is optional but strongly recommended for reproducibility and should match the latest version. + +`-profile ` is mandatory and should reflect either your own institutional profile or any pipeline profile specified in the [profile section](##-profile). + +This documentation imply that any `nextflow run nf-core/sarek` command is run with the appropriate `-r` and `-profile` commands. -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: +This will launch the pipeline and perform variant calling with the tools specified in `--tools`, see the [parameter section](https://nf-co.re/sarek/latest/parameters#tools) for details on variant calling tools. +In the above example the pipeline runs with the `docker` configuration profile. See below for more information about profiles. -```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz +Note that the pipeline will create the following files in your working directory: + +```bash +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow +# Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` -### Full samplesheet +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +> [!WARNING] +> Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. +The above pipeline run specified with a params file in yaml format: -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +```bash +nextflow run nf-core/sarek -params-file params.yaml +``` + +with: -```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +```yaml title="params.yaml" +input: './samplesheet.csv' +outdir: './results/' +genome: 'GATK.GRCh38' +<...> ``` -| Column | Description | -|----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + +## Input: Sample sheet configurations + +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use the parameter `--input` to specify its location. It has to be a comma-separated file with at least 3 columns, and a header row as shown in the examples below. + +It is recommended to use the absolute path of the files, but a relative path should also work. + +If necessary, a tumor sample can be associated to a normal sample as a pair, if specified with the same `patient` ID, a different `sample`, and the respective `status`. +An additional tumor sample (such as a relapse for example), can be added if specified with the same `patient` ID, a different `sample`, and the `status` value `1`. + +Sarek will output results in a different directory for _each sample_. +If multiple samples IDs are specified in the CSV file, Sarek will consider all files to be from different samples. + +Output from Variant Calling and/or Annotation will be in a specific directory for each sample and tool configuration (or normal/tumor pair if applicable). + +### Overview: Samplesheet Columns + +| Column | Description | +| ---------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `patient` | **Custom patient ID**; designates the patient/subject; must be unique for each patient, but one patient can have multiple samples (e.g. normal and tumor).
_Required_ | +| `sex` | **Sex chromosomes of the patient**; i.e. XX, XY..., only used for Copy-Number Variation analysis in a tumor/pair
_Optional, Default: `NA`_ | +| `status` | **Normal/tumor status of sample**; can be `0` (normal) or `1` (tumor).
_Optional, Default: `0`_ | +| `sample` | **Custom sample ID** for each tumor and normal sample; more than one tumor sample for each subject is possible, i.e. a tumor and a relapse; samples can have multiple lanes for which the _same_ ID must be used to merge them later (see also `lane`). Sample IDs must be unique for unique biological samples
_Required_ | +| `lane` | Lane ID, used when the `sample` is multiplexed on several lanes. Must be unique for each lane in the same sample (but does not need to be the original lane name), and must contain at least one character
_Required for `--step mapping`_ | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension `.fastq.gz` or `.fq.gz`. | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension `.fastq.gz` or `.fq.gz`. | +| `spring_1` | Full path to spring-compressed, gzipped FastQ file for read 1 or for reads 1 and 2. The Fastq file has to be first gzipped, then spring-compressed, and it must have the extension `.fastq.gz.spring` or `.fq.gz.spring`. | +| `spring_2` | Full path to spring-compressed, gzipped FastQ file for read 2. The Fastq file has to be first gzipped, then spring-compressed, and it must have the extension `.fastq.gz.spring` or `.fq.gz.spring`. | +| `bam` | Full path to (u)BAM file | +| `bai` | Full path to BAM index file | +| `cram` | Full path to CRAM file | +| `crai` | Full path to CRAM index file | +| `table` | Full path to recalibration table file | +| `vcf` | Full path to vcf file | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. -## Running the pipeline +### Start with mapping (`--step mapping` [default]) -The typical command for running the pipeline is as follows: +This step can be started either from FastQ files (gzip-compressed or gzip+spring-compressed) or (u)BAMs. The CSV must contain at least the columns `patient`, `sample`, `lane`, and `fastq_1/fastq_2`, `spring_1`, `spring_1/spring_2` or `bam`. -```console -nextflow run nf-core/sarek --input samplesheet.csv --genome GRCh38 -profile docker +#### Examples + +Minimal config file: + +```bash +patient,sample,lane,fastq_1,fastq_2 +patient1,test_sample,lane_1,test_1.fastq.gz,test_2.fastq.gz ``` -This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. +```bash +patient,sample,lane,spring_1 +patient1,test_sample,lane_1,test_R1_and_R2.fastq.gz.spring +``` -Note that the pipeline will create the following files in your working directory: +```bash +patient,sample,lane,spring_1,spring_2 +patient1,test_sample,lane_1,test_R1.fastq.gz.spring,test_R2.fastq.gz.spring +``` -```console -work # Directory containing the nextflow working files -results # Finished results (configurable, see below) -.nextflow_log # Log file from Nextflow -# Other nextflow hidden files, eg. history of pipeline runs and old logs. +```bash +patient,sample,lane,bam +patient1,test_sample,lane_1,test.bam ``` -### Updating the pipeline +In this example, the sample is multiplexed over three lanes: -When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: +```bash +patient,sample,lane,fastq_1,fastq_2 +patient1,test_sample,lane_1,test_L001_1.fastq.gz,test_L001_2.fastq.gz +patient1,test_sample,lane_2,test_L002_1.fastq.gz,test_L002_2.fastq.gz +patient1,test_sample,lane_3,test_L003_1.fastq.gz,test_L003_2.fastq.gz +``` -```console -nextflow pull nf-core/sarek +```bash +patient,sample,lane,bam +patient1,test_sample,1,test_L001.bam +patient1,test_sample,2,test_L002.bam +patient1,test_sample,3,test_L003.bam ``` -### Reproducibility +#### Full samplesheet -It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. +In this example, all possible columns are used. There are three lanes for the normal sample, two for the tumor sample, and one for the relapse sample, including the `sex` and `status` information per patient: -First, go to the [nf-core/sarek releases page](https://github.com/nf-core/sarek/releases) and find the latest version number - numeric only (eg. `2.6.1`). -Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 2.6.1`. +```bash +patient,sex,status,sample,lane,fastq_1,fastq_2 +patient1,XX,0,normal_sample,lane_1,test_L001_1.fastq.gz,test_L001_2.fastq.gz +patient1,XX,0,normal_sample,lane_2,test_L002_1.fastq.gz,test_L002_2.fastq.gz +patient1,XX,0,normal_sample,lane_3,test_L003_1.fastq.gz,test_L003_2.fastq.gz +patient1,XX,1,tumor_sample,lane_1,test2_L001_1.fastq.gz,test2_L001_2.fastq.gz +patient1,XX,1,tumor_sample,lane_2,test2_L002_1.fastq.gz,test2_L002_2.fastq.gz +patient1,XX,1,relapse_sample,lane_1,test3_L001_1.fastq.gz,test3_L001_2.fastq.gz +``` -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. +```bash +patient,sex,status,sample,lane,bam +patient1,XX,0,normal_sample,lane_1,test_L001.bam +patient1,XX,0,normal_sample,lane_2,test_L002.bam +patient1,XX,0,normal_sample,lane_3,test_L003.bam +patient1,XX,1,tumor_sample,lane_1,test2_L001.bam +patient1,XX,1,tumor_sample,lane_2,test2_L002.bam +patient1,XX,1,relapse_sample,lane_1,test3_L001.bam +``` -## Core Nextflow arguments +#### Using GPU accelerated alignment (`--aligner parabricks`) -> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +> [!NOTE] +> This is an experimental addition to the pipeline which is not at feature parity with the GATK implementation. -### `-profile` +To use the GPU based `parabricks/fq2bam` as an alternative to the CPU bsed GATK implementation add `--aligner parabricks --profile ,gpu` to your run command. The parabricks implementation does not support the use of this pipeline with `--profile conda`. -Use this parameter to choose a configuration profile. -Profiles can give configuration presets for different compute environments. +At the moment the implementation supports running the complete fq2bam module which does bwa-mem based alignment, coordinate sorting, duplicate marking and base quality score recalibration. We are working on making these individual components skippable (comparable to the GATK implementation) see [Issue #1853](https://github.com/nf-core/sarek/issues/1853) for more details on the ongoing work. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/). +The Sarek-generated CSV file is stored under `results/csv/mapped.csv` if `--save_mapped` is set. -> We highly recommend the use of `Docker` or `Singularity` containers for full pipeline reproducibility, however when this is not possible, `Conda` is also supported. +**Hints for custom configuration based on your local hardware setup:** -The pipeline also dynamically loads configurations from [github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. -For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). +You can supply more command-line arguments to the `fq2bam` process depending on your local setup. The performance depends on the type of GPU and the amount of CPU RAM that parabricks is able to utilize. The `--read-group-*` arguments are used by mutect2 and need to be added to your local config. Lowering `--bwa-nstreams` from 4 (standard) to 2 can help with memory issues. As well as `--gpuwrite` and `--gpusort`. For a more in-depth description of the available arguments please read the [parabricks fq2bam documentation](https://docs.nvidia.com/clara/parabricks/latest/documentation/tooldocs/man_fq2bam.html). -Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! -They are loaded in sequence, so later profiles can overwrite earlier profiles. +You will need to add the accelerator directive to your local config in order to make use of the GPUs correctly if you do not use a cluster setup with a dedicated GPU queue. The accelerator directive is added automatically for the executors `'awsbatch','google-batch','hq','k8s'`. See [Nextflow executors](https://www.nextflow.io/docs/latest/executor.html) for more infos. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. -This is _not_ recommended. - -* `docker` - * A generic configuration profile to be used with [Docker](https://docker.com/) -* `singularity` - * A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) -* `podman` - * A generic configuration profile to be used with [Podman](https://podman.io/) -* `shifter` - * A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) -* `charliecloud` - * A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) -* `conda` - * A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. -* `test` - * A profile with a complete configuration for automated testing - * Includes links to test data so needs no other parameters - -### `-resume` - -Specify this when restarting a pipeline. Nextflow will used cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. +If you need to adapt parabricks to your hardware, please copy and adapt the `custom-parabricks.config` provided below. More info on custom configs can be found [here](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files). -You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. +```groovy title="custom-parabricks.config" +process { + withName: 'PARABRICKS_FQ2BAM' { + // Remove an executor if you do not want it to set the accelerator directive or change the number + accelerator = { task.executor in ['awsbatch','google-batch','hq','k8s'] ? 4 : null } + ext.args = { [ + // Using specific read group tags for mutect compability (keep if using mutect) + "--read-group-id-prefix ${meta.sample_lane_id}", + "--read-group-sm ${meta.patient}_${meta.sample}", + "--read-group-lb ${meta.sample}", + "--read-group-pl ${params.seq_platform}", + // Using -B 3 for tumor samples (keep for bwamem compability) + meta.status == 1 ? "--bwa-options='-K 100000000 -Y -B 3'" : "--bwa-options='-K 100000000 -Y'", + // You can change the flags below + "--gpuwrite", + "--gpusort", + "--bwa-nstreams 2", + ].join(' ').trim() } + } +} +``` -### `-c` +### Start with duplicate marking (`--step markduplicates`) -Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. +#### Duplicate Marking -## Custom configuration +For starting from duplicate marking, the CSV file must contain at least the columns `patient`, `sample`, `bam`, `bai` or `patient`, `sample`, `cram`, `crai` -### Resource requests +> **NB:** When using [GATK4 MarkduplicatesSpark](https://gatk.broadinstitute.org/hc/en-us/articles/5358833264411-MarkDuplicatesSpark) reads should be name-sorted for efficient execution -Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. +Example: -For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: +```bash +patient,sample,bam,bai +patient1,test_sample,test_mapped.bam,test_mapped.bam.bai +``` -```console -[62/149eb0] NOTE: Process `RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) -Error executing process > 'RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' +```bash +patient,sample,cram,crai +patient1,test_sample,test_mapped.cram,test_mapped.cram.crai +``` -Caused by: - Process `RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) +The Sarek-generated CSV file is stored under `results/csv/mapped.csv` if in a previous run `--save_mapped` was set and will automatically be used as an input when specifying the parameter `--step markduplicates`. Otherwise this file will need to be manually generated. -Command executed: - STAR \ - --genomeDir star \ - --readFilesIn WT_REP1_trimmed.fq.gz \ - --runThreadN 2 \ - --outFileNamePrefix WT_REP1. \ - +#### Full samplesheet -Command exit status: - 137 +In this example, all possible columns are used including the `sex` and `status` information per patient: -Command output: - (empty) +```bash +patient,sex,status,sample,bam,bai +patient1,XX,0,test_sample,test_mapped.bam,test_mapped.bam.bai +patient1,XX,1,tumor_sample,test2_mapped.bam,test2_mapped.bam.bai +patient1,XX,1,relapse_sample,test3_mapped.bam,test3_mapped.bam.bai +``` -Command error: - .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. -Work dir: - /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb +```bash +patient,sex,status,sample,cram,crai +patient1,XX,0,normal_sample,test_mapped.cram,test_mapped.cram.crai +patient1,XX,1,tumor_sample,test2_mapped.cram,test2_mapped.cram.crai +patient1,XX,1,relapse_sample,test3_mapped.cram,test3_mapped.cram.crai +``` + +### Start with preparing the recalibration tables (`--step prepare_recalibration`) + +For starting directly from preparing the recalibration tables, the CSV file must contain at least the columns `patient`, `sample`, `bam`, `bai` or `patient`, `sample`, `cram`, `crai`. + +Example: -Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` +```bash +patient,sample,bam,bai +patient1,test_sample,test_md.bam,test_md.bam.bai ``` -To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so based on the search results the file we want is `modules/nf-core/software/star/align/main.nf`. If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. Providing you haven't set any other standard nf-core parameters to __cap__ the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. +```bash +patient,sample,cram,crai +patient1,test_sample,test_md.cram,test_md.cram.crai +``` -```nextflow -process { - withName: STAR_ALIGN { - memory = 100.GB - } -} +The Sarek-generated CSV file is stored under `results/csv/markduplicates_no_table.csv` and will automatically be used as an input when specifying the parameter `--step prepare_recalibration`. + +#### Full samplesheet + +In this example, all possible columns are used including the `sex` and `status` information per patient: + +```bash +patient,sex,status,sample,bam,bai +patient1,XX,0,test_sample,test_md.bam,test_md.bam.bai +patient1,XX,1,tumor_sample,test2_md.bam,test2_md.bam.bai +patient1,XX,1,relapse_sample,test3_md.bam,test3_md.bam.bai +``` + +```bash +patient,sex,status,sample,cram,crai +patient1,XX,0,normal_sample,test_md.cram,test_md.cram.crai +patient1,XX,1,tumor_sample,test2_md.cram,test2_md.cram.crai +patient1,XX,1,relapse_sample,test3_md.cram,test3_md.cram.crai +``` + +### Start with base quality score recalibration (`--step recalibrate`) + +For starting from base quality score recalibration the CSV file must contain at least the columns `patient`, `sample`, `bam`, `bai`, `table` or `patient`, `sample`, `cram`, `crai`, `table` containing the paths to _non-recalibrated CRAM/BAM_ files and the associated recalibration table. + +Example: + +```bash +patient,sample,bam,bai,table +patient1,test_sample,test_mapped.cram,test_mapped.cram.crai,test.table +``` + +```bash +patient,sample,cram,crai,table +patient1,test_sample,test_mapped.cram,test_mapped.cram.crai,test.table +``` + +The Sarek-generated CSV file is stored under `results/csv/markduplicates.csv` and will automatically be used as an input when specifying the parameter `--step recalibrate`. + +#### Full samplesheet + +In this example, all possible columns are used including the `sex` and `status` information per patient: + +```bash +patient,sex,status,sample,cram,crai,table +patient1,XX,0,test_sample,test_mapped.cram,test_mapped.cram.crai,test.table +patient1,XX,1,tumor_sample,test2_mapped.cram,test2_mapped.cram.crai,test2.table +patient1,XX,1,relapse_sample,test3_mapped.cram,test3_mapped.cram.crai,test3.table +``` + +### Start with variant calling (`--step variant_calling`) + +For starting from the variant calling step, the CSV file must contain at least the columns `patient`, `sample`, `bam`, `bai` or `patient`, `sample`, `cram`, `crai`. + +Example: + +```bash +patient,sample,bam,bai +patient1,test_sample,test_mapped.bam,test_mapped.bam.bai +``` + +```bash +patient,sample,cram,crai +patient1,test_sample,test_mapped.cram,test_mapped.cram.crai +``` + +The Sarek-generated CSV file is stored under `results/csv/recalibrated.csv` and will automatically be used as an input when specifying the parameter `--step variant_calling`. + +#### Full samplesheet + +In this example, all possible columns are used including the `sex` and `status` information per patient: + +```bash +patient,sex,status,sample,cram,crai +patient1,XX,0,normal_sample,test_mapped.cram,test_mapped.cram.crai +patient1,XX,1,tumor_sample,test2_mapped.cram,test2_mapped.cram.crai +patient1,XX,1,relapse_sample,test3_mapped.cram,test3_mapped.cram.crai +``` + +### Start with annotation (`--step annotate`) + +For starting from the annotation step, the CSV file must contain at least the columns `patient`, `sample`, `vcf`. + +As Sarek will use [bgzip](http://www.htslib.org/doc/bgzip.html) and [tabix](http://www.htslib.org/doc/tabix.html) to compress and index the annotated VCF files, it expects the input VCF files to be sorted and compressed. + +Example: + +```bash +patient,sample,vcf +patient1,test_sample,test.vcf.gz +``` + +The Sarek-generated CSV file is stored under `results/csv/variantcalled.csv` and will automatically be used as an input when specifying the parameter `--step annotation`. + +#### Full samplesheet + +In this example, all possible columns are used including the `variantcaller` information per sample: + +```bash +patient,sample,variantcaller,vcf +test,sample3,strelka,sample3.variants.vcf.gz +test,sample4_vs_sample3,manta,sample4_vs_sample3.diploid_sv.vcf.gz +test,sample4_vs_sample3,manta,sample4_vs_sample3.somatic_sv.vcf.gz +``` + +## Updating the pipeline + +When you launch a pipeline from the command-line with `nextflow run nf-core/sarek -params-file params.yaml`, Nextflow will automatically pull the pipeline code from GitHub and store it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: + +```bash +nextflow pull nf-core/sarek ``` -> **NB:** We specify just the process name i.e. `STAR_ALIGN` in the config file and not the full task name string that is printed to screen in the error message or on the terminal whilst the pipeline is running i.e. `RNASEQ:ALIGN_STAR:STAR_ALIGN`. You may get a warning suggesting that the process selector isn't recognised but you can ignore that if the process name has been specified correctly. This is something that needs to be fixed upstream in core Nextflow. +## Reproducibility + +It is a good idea to specify the pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. + +First, go to the [nf-core/sarek releases page](https://github.com/nf-core/sarek/releases) and find the latest version number - numeric only (eg. `3.3.2`). +Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 3.3.2`. Of course, you can switch to another version by changing the number after the `-r` flag. + +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. + +To further assist in reproducibility, you can use share and reuse [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +> [!TIP] +> If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. -### Updating containers +# Core Nextflow arguments -The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. +> [!NOTE] +> These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen) -1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) -2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) -3. Create the custom config accordingly: +## `-profile` - * For Docker: +Use this parameter to choose a configuration profile. +Profiles can give configuration presets for different compute environments. + +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +> [!IMPORTANT] +> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. - * For Singularity: +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to check if your system is supported, please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. - * For Conda: +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer environment. + +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://charliecloud.io/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. +- `gpu` + - A generic configuration profile which adds necessary flags to `docker` and `singularity` profiles. + +## `-resume` + +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` +You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. -> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +## `-c` + +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. + +## Nextflow memory requirements + +In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. +We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): + +```bash +NXF_OPTS='-Xms1g -Xmx4g' +``` + +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the pipeline steps, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher resources request (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. + +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. + +### Custom Containers + +In some cases, you may wish to change the container or conda environment used by a pipeline steps for a particular tool. By default, nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However, in some cases the pipeline specified version maybe out of date. + +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. + +### Custom Tool Arguments + +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. + +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. ### nf-core/configs @@ -244,449 +486,960 @@ The Nextflow `-bg` flag launches Nextflow in the background, detached from your Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). -## Nextflow memory requirements +# Custom configuration -In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. -We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): +## Resource requests -```console -NXF_OPTS='-Xms1g -Xmx4g' -``` +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the pipeline steps, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher resources request (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -## Troubleshooting +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. -### TSV file +### Custom Containers -> **NB** Delimiter is the tab (`\t`) character, and no header is required +In some cases, you may wish to change the container or conda environment used by a pipeline steps for a particular tool. By default, nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However, in some cases the pipeline specified version maybe out of date. -There are different kinds of `TSV` files that can be used as input, depending on the input files available (`FASTQ`, `unmapped BAM`, `recalibrated BAM`...). -The `TSV` file should correspond to the correct step. -For all possible `TSV` files, described in the next sections, here is an explanation of what the columns refer to: +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. -`Sarek` auto-generates `TSV` files for all and for each individual samples, depending of the options specified. +### Custom Tool Arguments -* `subject` designates the subject, it should be the ID of the subject, and it must be unique for each subject, but one subject can have multiple samples (e.g. -normal and tumor) -* `sex` are the sex chromosomes of the subject, (ie `XX`, `XY`...) and will only be used for Copy-Number Variation in a tumor/pair. -* `status` is the status of the measured sample, (`0` for Normal or `1` for Tumor) -* `sample` designates the sample, it should be the ID of the sample (it is possible to have more than one tumor sample for each subject, i.e. a tumor and a relapse), it must be unique, but samples can have multiple lanes (which will later be merged) -* `lane` is used when the sample is multiplexed on several lanes, it must be unique for each lane in the same sample (but does not need to be the original lane name), and must contain at least one character -* `fastq1` is the path to the first pair of the `FASTQ` file -* `fastq2` is the path to the second pair of the `FASTQ` file -* `bam` is the path to the `BAM` file -* `bai` is the path to the `BAM` index file -* `recaltable` is the path to the recalibration table -* `mpileup` is the path to the mpileup file +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. -It is recommended to use the absolute path of the files, but relative path should also work. +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. -If necessary, a tumor sample can be associated to a normal sample as a pair, if specified with the same `subject`and a different `sample`. -An additional tumor sample (such as a relapse for example), can be added if specified with the same `subject` and a different `sample`. +## nf-core/configs -`Sarek` will output results in a different directory for each sample. -If multiple samples are specified in the `TSV` file, `Sarek` will consider all files to be from different samples. -Multiple `TSV` files can be specified if the path is enclosed in quotes. +In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. + +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. + +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). + +## Azure Resource Requests -Output from Variant Calling and/or Annotation will be in a specific directory for each sample (or normal/tumor pair if applicable). +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. -#### --input <FASTQ> --step mapping +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). -The `TSV` file to start with the mapping step (`--step mapping`) with paired-end `FASTQs` should contain the columns: +# Troubleshooting & FAQ -`subject sex status sample lane fastq1 fastq2` +## How to test the pipeline -In this example (`example_fastq.tsv`), there are 3 read groups. +When using default parameters only, sarek runs preprocessing and `Strelka`. +This is reflected in the default test profile: -| | | | | | | | -|-|-|-|-|-|-|-| -|SUBJECT_ID|XX|0|SAMPLE_ID|1|/samples/normal1_1.fastq.gz|/samples/normal1_2.fastq.gz| -|SUBJECT_ID|XX|0|SAMPLE_ID|2|/samples/normal2_1.fastq.gz|/samples/normal2_2.fastq.gz| -|SUBJECT_ID|XX|0|SAMPLE_ID|3|/samples/normal3_1.fastq.gz|/samples/normal3_2.fastq.gz| +```bash +nextflow run nf-core/sarek -profile test, --outdir results +``` + +Expected run output: ```bash ---input example_fastq.tsv +[85/6b7739] process > NFCORE_SAREK:SAREK:PREPARE_GENOME:BWAMEM1_INDEX (genome.fasta) [100%] 1 of 1 ✔ +[- ] process > NFCORE_SAREK:SAREK:PREPARE_GENOME:BWAMEM2_INDEX - +[- ] process > NFCORE_SAREK:SAREK:PREPARE_GENOME:DRAGMAP_HASHTABLE - +[22/cf54a8] process > NFCORE_SAREK:SAREK:PREPARE_GENOME:GATK4_CREATESEQUENCEDICTIONARY (genome.fasta) [100%] 1 of 1 ✔ +[- ] process > NFCORE_SAREK:SAREK:PREPARE_GENOME:MSISENSORPRO_SCAN - +[28/dad25a] process > NFCORE_SAREK:SAREK:PREPARE_GENOME:SAMTOOLS_FAIDX (genome.fasta) [100%] 1 of 1 ✔ +[23/3fe964] process > NFCORE_SAREK:SAREK:PREPARE_GENOME:TABIX_DBSNP (dbsnp_146.hg38.vcf) [100%] 1 of 1 ✔ +[- ] process > NFCORE_SAREK:SAREK:PREPARE_GENOME:TABIX_GERMLINE_RESOURCE - +[- ] process > NFCORE_SAREK:SAREK:PREPARE_GENOME:TABIX_KNOWN_SNPS - +[14/26e286] process > NFCORE_SAREK:SAREK:PREPARE_GENOME:TABIX_KNOWN_INDELS (mills_and_1000G.indels.vcf) [100%] 1 of 1 ✔ +[- ] process > NFCORE_SAREK:SAREK:PREPARE_GENOME:TABIX_PON - +[76/04d107] process > NFCORE_SAREK:SAREK:PREPARE_INTERVALS:CREATE_INTERVALS_BED (genome.interval_list) [100%] 1 of 1 ✔ +[d4/f97174] process > NFCORE_SAREK:SAREK:PREPARE_INTERVALS:GATK4_INTERVALLISTTOBED (genome) [100%] 1 of 1 ✔ +[70/82ba3c] process > NFCORE_SAREK:SAREK:PREPARE_INTERVALS:TABIX_BGZIPTABIX_INTERVAL_SPLIT (chr22_1-40001) [100%] 1 of 1 ✔ +[d4/c2d0c4] process > NFCORE_SAREK:SAREK:PREPARE_INTERVALS:TABIX_BGZIPTABIX_INTERVAL_COMBINED (genome) [100%] 1 of 1 ✔ +[- ] process > NFCORE_SAREK:SAREK:CONVERT_FASTQ_INPUT:SAMTOOLS_VIEW_MAP_MAP - +[- ] process > NFCORE_SAREK:SAREK:CONVERT_FASTQ_INPUT:SAMTOOLS_VIEW_UNMAP_UNMAP - +[- ] process > NFCORE_SAREK:SAREK:CONVERT_FASTQ_INPUT:SAMTOOLS_VIEW_UNMAP_MAP - +[- ] process > NFCORE_SAREK:SAREK:CONVERT_FASTQ_INPUT:SAMTOOLS_VIEW_MAP_UNMAP - +[- ] process > NFCORE_SAREK:SAREK:CONVERT_FASTQ_INPUT:SAMTOOLS_MERGE_UNMAP - +[- ] process > NFCORE_SAREK:SAREK:CONVERT_FASTQ_INPUT:COLLATE_FASTQ_UNMAP - +[- ] process > NFCORE_SAREK:SAREK:CONVERT_FASTQ_INPUT:COLLATE_FASTQ_MAP - +[- ] process > NFCORE_SAREK:SAREK:CONVERT_FASTQ_INPUT:CAT_FASTQ - +[c4/f59e5a] process > NFCORE_SAREK:SAREK:FASTQC (test-test_L1) [100%] 1 of 1 ✔ +[0b/c5a999] process > NFCORE_SAREK:SAREK:FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP:BWAMEM1_MEM (test) [100%] 1 of 1 ✔ +[- ] process > NFCORE_SAREK:SAREK:FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP:BWAMEM2_MEM - +[- ] process > NFCORE_SAREK:SAREK:FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP:DRAGMAP_ALIGN - +[c7/664cd1] process > NFCORE_SAREK:SAREK:BAM_MARKDUPLICATES:GATK4_MARKDUPLICATES (test) [100%] 1 of 1 ✔ +[13/bc73b6] process > NFCORE_SAREK:SAREK:BAM_MARKDUPLICATES:INDEX_MARKDUPLICATES (test) [100%] 1 of 1 ✔ +[2a/99608e] process > NFCORE_SAREK:SAREK:BAM_MARKDUPLICATES:CRAM_QC_MOSDEPTH_SAMTOOLS:SAMTOOLS_STATS (test) [100%] 1 of 1 ✔ +[f2/0420ca] process > NFCORE_SAREK:SAREK:BAM_MARKDUPLICATES:CRAM_QC_MOSDEPTH_SAMTOOLS:MOSDEPTH (test) [100%] 1 of 1 ✔ +[- ] process > NFCORE_SAREK:SAREK:CRAM_TO_BAM - +[eb/46945a] process > NFCORE_SAREK:SAREK:BAM_BASERECALIBRATOR:GATK4_BASERECALIBRATOR (test) [100%] 1 of 1 ✔ +[- ] process > NFCORE_SAREK:SAREK:BAM_BASERECALIBRATOR:GATK4_GATHERBQSRREPORTS - +[ec/2377d4] process > NFCORE_SAREK:SAREK:BAM_APPLYBQSR:GATK4_APPLYBQSR (test) [100%] 1 of 1 ✔ +[- ] process > NFCORE_SAREK:SAREK:BAM_APPLYBQSR:CRAM_MERGE_INDEX_SAMTOOLS:MERGE_CRAM - +[88/3af664] process > NFCORE_SAREK:SAREK:BAM_APPLYBQSR:CRAM_MERGE_INDEX_SAMTOOLS:INDEX_CRAM (test) [100%] 1 of 1 ✔ +[f4/828fde] process > NFCORE_SAREK:SAREK:CRAM_QC_RECAL:SAMTOOLS_STATS (test) [100%] 1 of 1 ✔ +[fb/a9d66f] process > NFCORE_SAREK:SAREK:CRAM_QC_RECAL:MOSDEPTH (test) [100%] 1 of 1 ✔ +[- ] process > NFCORE_SAREK:SAREK:CRAM_TO_BAM_RECAL - +[ef/026185] process > NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_SINGLE_STRELKA:STRELKA_SINGLE (test) [100%] 1 of 1 ✔ +[- ] process > NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_SINGLE_STRELKA:MERGE_STRELKA - +[- ] process > NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_SINGLE_STRELKA:MERGE_STRELKA_GENOME - +[- ] process > NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_TUMOR_ONLY_ALL:BAM_VARIANT_CALLING_SINGLE_STRELKA:STRELKA_SINGLE - +[- ] process > NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_TUMOR_ONLY_ALL:BAM_VARIANT_CALLING_SINGLE_STRELKA:MERGE_STRELKA - +[- ] process > NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_TUMOR_ONLY_ALL:BAM_VARIANT_CALLING_SINGLE_STRELKA:MERGE_STRELKA_GENOME - +[- ] process > NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_SOMATIC_STRELKA:STRELKA_SOMATIC - +[- ] process > NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_SOMATIC_STRELKA:MERGE_STRELKA_INDELS - +[- ] process > NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_SOMATIC_STRELKA:MERGE_STRELKA_SNVS - +[bc/f3f5cf] process > NFCORE_SAREK:SAREK:VCF_QC_BCFTOOLS_VCFTOOLS:BCFTOOLS_STATS (test) [100%] 1 of 1 ✔ +[21/8d4f02] process > NFCORE_SAREK:SAREK:VCF_QC_BCFTOOLS_VCFTOOLS:VCFTOOLS_TSTV_COUNT (test) [100%] 1 of 1 ✔ +[36/957fba] process > NFCORE_SAREK:SAREK:VCF_QC_BCFTOOLS_VCFTOOLS:VCFTOOLS_TSTV_QUAL (test) [100%] 1 of 1 ✔ +[70/a8e064] process > NFCORE_SAREK:SAREK:VCF_QC_BCFTOOLS_VCFTOOLS:VCFTOOLS_SUMMARY (test) [100%] 1 of 1 ✔ +[36/e35b1b] process > NFCORE_SAREK:SAREK:CUSTOM_DUMPSOFTWAREVERSIONS (1) [100%] 1 of 1 ✔ +[3f/3c3356] process > NFCORE_SAREK:SAREK:MULTIQC [100%] 1 of 1 ✔ +-[nf-core/sarek] Pipeline completed successfully- +Completed at: 09-Jun-2023 13:46:31 +Duration : 1m 50s +CPU hours : (a few seconds) +Succeeded : 27 ``` -Or, for a normal/tumor pair: +The pipeline comes with a number of possible paths and tools that can be used. + +Due to the small test data size, unfortunately not everything can be tested from top-to-bottom, but often is done by utilizing the pipeline's `--step` parameter. + +For more extensive testing purpose, we have the `test_cache` profile that contain the same data, but on which the path to the reference and input files can be changed using the `--test_data_base` params. + +Annotation is generally tested separately from the remaining workflow, since we use references for `C.elegans`, while the remaining tests are run on downsampled human data. + +```bash +nextflow run nf-core/sarek -profile test_cache, --outdir results --tools snpeff --step annotation +``` + +If you are interested in any of the other tests that are run on every code change or would like to run them yourself, you can take a look at `tests/.yml`. +For each entry the respective nextflow command run and the expected output is specified. + +Some of the currently, available test profiles: + +| Test profile | Run command | +| :-------------- | :------------------------------------------------------------------------------------ | +| annotation | `nextflow run main.nf -profile test_cache,annotation,docker --tools snpeff,vep,merge` | +| no_intervals | `nextflow run main.nf -profile test_cache,no_intervals,docker` | +| targeted | `nextflow run main.nf -profile test_cache,targeted,docker` | +| tools_germline | `nextflow run main.nf -profile test_cache,tools_germline,docker --tools strelka` | +| tools_tumoronly | `nextflow run main.nf -profile test_cache,tools_tumoronly,docker --tools mutect2` | +| tools_somatic | `nextflow run main.nf -profile test_cache,tools_somatic,docker --tools strelka` | +| trimming | `nextflow run main.nf -profile test_cache,trim_fastq,docker` | +| umi | `nextflow run main.nf -profile test_cache,umi,docker` | +| use_gatk_spark | `nextflow run main.nf -profile test_cache,use_gatk_spark,docker` | + +If you are interested in any of the other profiles that are used, you can take a look at `conf/test/.config`. + +## How can the different steps be used + +Sarek can be started at different points in the analysis by setting the parameter `--step`. Once started at a certain point, the pipeline runs through all the following steps without additional intervention. For example when starting from `--step mapping` (set by default) and `--tools strelka,vep`, the input reads will be aligned, duplicate marked, recalibrated, variant called with Strelka, and finally VEP will annotate the called variants. + +## Which variant calling tool is implemented for which data type? + +This list is by no means exhaustive and it will depend on the specific analysis you would like to run. This is a suggestion based on the individual docs of the tools specifically for human genomes and a garden-variety sequencing run as well as what has been added to the pipeline. -In this example (`example_pair_fastq.tsv`), there are 3 read groups for the normal sample and 2 for the tumor sample. +| Tool | WGS | WES |  Panel |  Germline | Tumor-Only | Somatic (Tumor-Normal) | +| :------------------------------------------------------------------------------------------------------ | :-: | :-: | :----: | :-------: | :--------: | :--------------------: | +| [DeepVariant](https://github.com/google/deepvariant) | x | x | x | x | - | - | +| [FreeBayes](https://github.com/ekg/freebayes) | x | x | x | x | x | x | +| [GATK HaplotypeCaller](https://gatk.broadinstitute.org/hc/en-us/articles/5358864757787-HaplotypeCaller) | x | x | x | x | - | - | +| [GATK Mutect2](https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2) | x | x | x | - | x | x | +| [lofreq](https://github.com/CSB5/lofreq) | x | x | x | - | x | - | +| [mpileup](https://www.htslib.org/doc/samtools-mpileup.html) | x | x | x | x | x | - | +| [MuSE](https://github.com/wwylab/MuSE) | x | x | x | - | - | x | +| [Strelka](https://github.com/Illumina/strelka) | x | x | - | - | - | x | +| [Manta](https://github.com/Illumina/manta) | x | x | x | x | x | x | +| [indexcov](https://github.com/brentp/goleft/tree/master/indexcov) | x | - | - | x | - | x | +| [TIDDIT](https://github.com/SciLifeLab/TIDDIT) | x | x | x | x | x | x | +| [ASCAT](https://github.com/VanLoo-lab/ascat) | x | x | - | - | - | x | +| [CNVKit](https://cnvkit.readthedocs.io/en/stable/) | x | x | - | x | x | x | +| [Control-FREEC](https://github.com/BoevaLab/FREEC) | x | x | x | - | x | x | +| [MSIsensor2](https://github.com/niu-lab/msisensor2) | x | x | x | - | X | - | +| [MSIsensorPro](https://github.com/xjtu-omics/msisensor-pro) | x | x | x | - | - | x | +| [Varlociraptor](https://varlociraptor.github.io/landing/) | x | x | x | x | x | x | -| | | | | | | | -|-|-|-|-|-|-|-| -|SUBJECT_ID|XX|0|SAMPLE_ID1|1|/samples/normal1_1.fastq.gz|/samples/normal1_2.fastq.gz| -|SUBJECT_ID|XX|0|SAMPLE_ID1|2|/samples/normal2_1.fastq.gz|/samples/normal2_2.fastq.gz| -|SUBJECT_ID|XX|0|SAMPLE_ID1|3|/samples/normal3_1.fastq.gz|/samples/normal3_2.fastq.gz| -|SUBJECT_ID|XX|1|SAMPLE_ID2|1|/samples/tumor1_1.fastq.gz|/samples/tumor1_2.fastq.gz| -|SUBJECT_ID|XX|1|SAMPLE_ID2|2|/samples/tumor2_1.fastq.gz|/samples/tumor2_2.fastq.gz| +## How to run ASCAT with whole-exome sequencing data? + +ASCAT runs out of the box on whole genome sequencing data using iGenomes resources. While the ASCAT implementation in sarek is capable of running with whole-exome sequencing data, the needed references are currently not provided with the igenomes.config. According to the [developers](https://github.com/VanLoo-lab/ascat/issues/97) of ASCAT, loci and allele files (one file per chromosome) can be downloaded directly from the [Battenberg repository](https://ora.ox.ac.uk/objects/uuid:08e24957-7e76-438a-bd38-66c48008cf52). + +Please note that: + +- Row names (for GC and RT correction files) should be `${chr}_${position}` (there is no SNP/probe ID for HTS data). +- All row names in GC and RT correction files should also appear in the loci files +- Loci and allele files must contain the same set of SNPs +- ASCAT developers strongly recommend using a BED file for WES/TS data. This prevents considering SNPs covered by off-target reads that would add noise to log/BAF tracks. +- The total number of GC correction loci in a sample must be at least 10% of the number of loci with logR values. If the number of GC correction loci is too small compared to the total number of loci, ASCAT will throw an error. + +From 'Reference files' https://github.com/VanLoo-lab/ascat: + +> For WES and targeted sequencing, we recommend using the reference files (loci, allele and logR correction files) as part of the Battenberg package. Because they require a high-resolution input, our reference files for WGS are not suitable for WES and targeted sequencing. For WES, loci and allele files from the Battenberg package can be fed into ascat.prepareHTS. For targeted sequencing, allele files from the Battenberg package can be fed into ascat.prepareTargetedSeq, which will generate cleaned loci and allele files that can be fed into ascat.prepareHTS. + +### How to generate ASCAT resources for exome or targeted sequencing + +1. Fetch the GC content correction and replication timing (RT) correction files from the [Dropbox links provided by the ASCAT developers](https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS) and intersect the SNP coordinates with the exome target coordinates. If the target file has 'chr' prefixes, make a copy with these removed first. Extract the GC and RT information for only the on target SNPs and zip the results. ```bash ---input example_pair_fastq.tsv +sed -e 's/chr//' targets_with_chr.bed > targets.bed + +for t in GC RT +do + unzip ${t}_G1000_hg38.zip + + cut -f 1-3 ${t}_G1000_hg38.txt > ascat_${t}_snps_hg38.txt + tail -n +2 ascat_${t}_snps_hg38.txt | awk '{ print $2 "\t" $3-1 "\t" $3 "\t" $1 }' > ascat_${t}_snps_hg38.bed + bedtools intersect -a ascat_${t}_snps_hg38.bed -b targets.bed | awk '{ print $1 "_" $3 }' > ascat_${t}_snps_on_target_hg38.txt + + head -n 1 ${t}_G1000_hg38.txt > ${t}_G1000_on_target_hg38.txt + grep -f ascat_${t}_snps_on_target_hg38.txt ${t}_G1000_hg38.txt >> ${t}_G1000_on_target_hg38.txt + zip ${t}_G1000_on_target_hg38.zip ${t}_G1000_on_target_hg38.txt + + rm ${t}_G1000_hg38.zip +done ``` -#### --input <uBAM> --step mapping +2. Download the Battenberg 1000G loci and alleles files. The steps below follow downloading from the [Battenberg repository at the Oxford University Research Archive](https://ora.ox.ac.uk/objects/uuid:08e24957-7e76-438a-bd38-66c48008cf52). The files are also available via Dropbox links from the same page as the GC and RT correction files above. -The `TSV` file to start with the mapping step (`--step mapping`) with `unmapped BAM` files should contain the columns: +```bash +wget https://ora.ox.ac.uk/objects/uuid:08e24957-7e76-438a-bd38-66c48008cf52/files/rt435gd52w +mv rt345gd52w battenberg.zip +tar xf battenberg.zip + +unzip 1000G_loci_hg38_chr.zip +cd 1000G_loci_hg38 +mkdir battenberg_alleles_on_target_hg38 +mv *allele* battenberg_alleles_on_target_hg38/ +mkdir battenberg_loci_on_target_hg38 +mv *loci* battenberg_loci_on_target_hg38/ +``` -`subject sex status sample lane bam` +3. Copy the `targets_with_chr.bed` and `GC_G1000_on_target_hg38.txt` files into the newly created `battenberg_loci_on_target_hg38` folder before running the next set of steps. ASCAT generates a list of GC correction loci with sufficient coverage in a sample, then intersects that with the list of all loci with tumour logR values in that sample. If the intersection is <10% the size of the latter, it will fail with an error. Because the Battenberg loci/allele sets are very dense, subsetting to on-target regions is still too many loci. This script ensures that all SNPs with GC correction information are included in the loci list, plus a random sample of another 30% of all on target loci. You may need to vary this proportion depending on your set of targets. A good rule of thumb is that the size of your GC correction loci list should be about 15% the size of your total loci list. This allows for a margin of error. -In this example (`example_ubam.tsv`), there are 3 read groups. +### 'chr'-based versus non 'chr'-based reference -| | | | | | | -|-|-|-|-|-|-| -|SUBJECT_ID|XX|0|SAMPLE_ID|1|/samples/normal_1.bam| -|SUBJECT_ID|XX|0|SAMPLE_ID|2|/samples/normal_2.bam| -|SUBJECT_ID|XX|0|SAMPLE_ID|3|/samples/normal_3.bam| +Please note that loci files provided from ASCAT developers (https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WES) are not 'chr'-based (chromosome names are '1', '2', '3', etc. and not 'chr1', 'chr2', 'chr3', etc.). If your BAMs are 'chr'-based, you will need to add 'chr' ```bash ---input example_ubam.tsv +for i in {1..22} X; + do sed -i 's/^/chr/' G1000_loci_hg19_chr${i}.txt; +done). ``` -Or, for a normal/tumor pair: +ASCAT will internally remove 'chr' so the other files (allele, GC correction and RT correction) should not be modified and chrom_names (ascat.prepareHTS) should be c(1:22,'X'). + +If using ASCAT provided references: + +```bash + +cd .../G1000_lociAll_hg38_unzipped/G1000_lociAll_hg38 + +# Function to check and correct 'chr' prefix +check_and_correct_chr_prefix() { + local file=$1 + local chr_number=$2 + + # Check if file exists + if [ ! -f "$file" ]; then + echo "Error: File $file not found." + exit 1 + fi + + # Check first line of the file + first_line=$(head -n 1 "$file") + + if [[ $first_line == chr${chr_number}* ]]; then + echo "File $file already has correct 'chr' prefix. No changes needed." + elif [[ $first_line == chrchr${chr_number}* ]]; then + echo "File $file has duplicate 'chr' prefix. Correcting..." + sed -i 's/^chrchr/chr/' "$file" + elif [[ $first_line == ${chr_number}* ]]; then + echo "File $file is missing 'chr' prefix. Adding..." + sed -i 's/^/chr/' "$file" + else + echo "Error: Unexpected format in $file. Please check manually." + exit 1 + fi +} + +# Check and correct 'chr' prefix for each loci file +for i in {1..22} X; do + check_and_correct_chr_prefix "G1000_loci_hg38_chr${i}.txt" "${i}" +done + +for i in {1..22} X +do + # Generate BED file from the tailored loci set + awk '{ print $1 "\t" $2-1 "\t" $2 }' G1000_loci_hg38_chr${i}.txt > chr${i}.bed + + # Extract relevant GC content data for this chromosome + grep "^chr${i}_" GC_G1000_on_target_hg38.txt > chr${i}.txt + + # Intersect BED file with target regions to find loci on target + bedtools intersect -a chr${i}.bed -b targets_with_chr.bed | awk '{ print $1 "_" $3 }' > chr${i}_on_target.txt + + # Calculate the number of lines needed for random sampling (30% of total) + n=$(wc -l < chr${i}_on_target.txt) + count=$((n * 3 / 10)) + + # Get loci that are both on target and match the GC content data + grep -xf chr${i}.txt chr${i}_on_target.txt > chr${i}.temp -In this example (`example_pair_ubam.tsv`), there are 3 read groups for the normal sample and 2 for the tumor sample. + # Add random subset of on-target loci to the list + shuf -n $count chr${i}_on_target.txt >> chr${i}.temp -| | | | | | | -|-|-|-|-|-|-| -|SUBJECT_ID|XX|0|SAMPLE_ID1|1|/samples/normal_1.bam| -|SUBJECT_ID|XX|0|SAMPLE_ID1|2|/samples/normal_2.bam| -|SUBJECT_ID|XX|0|SAMPLE_ID1|3|/samples/normal_3.bam| -|SUBJECT_ID|XX|1|SAMPLE_ID2|1|/samples/tumor_1.bam| -|SUBJECT_ID|XX|1|SAMPLE_ID2|2|/samples/tumor_2.bam| + # Sort, remove duplicates, and format output + sort -n -k2 -t '_' chr${i}.temp | uniq | awk 'BEGIN { FS="_" } ; { print $1 "\t" $2 }' > battenberg_loci_on_target_hg38_chr${i}.txt +done + +# Compress the resulting loci files into a zip archive +zip battenberg_loci_on_target_hg38.zip battenberg_loci_on_target_hg38_chr*.txt + +``` + +If using Battenberg provided references: ```bash ---input example_pair_ubam.tsv +cd battenberg_loci_on_target_hg38/ +rm *chrstring* +rm 1kg.phase3.v5a_GRCh38nounref_loci_chr23.txt +for i in {1..22} X +do + awk '{ print $1 "\t" $2-1 "\t" $2 }' 1kg.phase3.v5a_GRCh38nounref_loci_chr${i}.txt > chr${i}.bed + grep "^${i}_" GC_G1000_on_target_hg38.txt | awk '{ print "chr" $1 }' > chr${i}.txt + bedtools intersect -a chr${i}.bed -b targets_with_chr.bed | awk '{ print $1 "_" $3 }' > chr${i}_on_target.txt + n=`wc -l chr${i}_on_target.txt | awk '{ print $1 }'` + count=$((n * 3 / 10)) + grep -xf chr${i}.txt chr${i}_on_target.txt > chr${i}.temp + shuf -n $count chr${i}_on_target.txt >> chr${i}.temp + sort -n -k2 -t '_' chr${i}.temp | uniq | awk 'BEGIN { FS="_" } ; { print $1 "\t" $2 }' > battenberg_loci_on_target_hg38_chr${i}.txt +done +zip battenberg_loci_on_target_hg38.zip battenberg_loci_on_target_hg38_chr*.txt ``` -#### --input <TSV> --step prepare_recalibration +4. Extract the alleles for the same set of SNPs. Uses a short R script defined below. -To start from the preparation of the recalibration step (`--step prepare_recalibration`), a `TSV` file needs to be given as input containing the paths to the `non-recalibrated BAM` files. -The `Sarek`-generated `TSV` file is stored under `results/Preprocessing/TSV/duplicates_marked_no_table.tsv` and will automatically be used as an input when specifying the parameter `--step prepare_recalibration`. +```bash +cd ../battenberg_alleles_on_target_hg38/ +rm 1kg.phase3.v5a_GRCh38nounref_allele_index_chr23.txt +for i in {1..22} X +do + Rscript intersect_ascat_alleles.R ../battenberg_loci_on_target_hg38/battenberg_loci_on_target_hg38_chr${i}.txt \ + 1kg.phase3.v5a_GRCh38nounref_allele_index_chr${i}.txt battenberg_alleles_on_target_hg38_chr${i}.txt +done +zip battenberg_alleles_on_target_hg38.zip battenberg_alleles_on_target_hg38_chr*.txt +``` -The `TSV` contains the following columns: +Rscript `intersect_ascat_alleles.R` -`subject sex status sample bam bai` +```bash +#!/usr/bin/env Rscript -| | | | | | | -|-|-|-|-|-|-| -|SUBJECT_ID|XX|0|SAMPLE_ID|/samples/normal.md.bam|/samples/normal.md.bai| +args = commandArgs(trailingOnly=TRUE) -Or, for a normal/tumor pair: +loci = read.table(args[1], header=F, sep="\t", stringsAsFactors=F) +alleles = read.table(args[2], header=T, sep="\t", stringsAsFactors=F) -| | | | | | | -|-|-|-|-|-|-| -|SUBJECT_ID|XX|0|SAMPLE_ID1|/samples/normal.md.bam|/samples/normal.md.bai| -|SUBJECT_ID|XX|1|SAMPLE_ID2|/samples/tumor.md.bam|/samples/tumor.md.bai| +i = intersect(loci$V2, alleles$position) -#### --input <TSV> --step prepare_recalibration --skip_markduplicates +out = subset(alleles, alleles$position %in% i) +write.table(out, args[3], col.names=T, row.names=F, quote=F, sep="\t") +``` -The `Sarek`-generated `TSV` file is stored under `results/Preprocessing/TSV/mapped.tsv` and will automatically be used as an input when specifying the parameter `--step prepare_recalibration --skip_markduplicates`. -The `TSV` file contains the same columns, but the content is slightly different: +5. Move or copy all of the zip files you've created to a suitable location. Specify these in your parameters, e.g. -| | | | | | | -|-|-|-|-|-|-| -|SUBJECT_ID|XX|0|SAMPLE_ID|/samples/normal.bam|/samples/normal.bai| +```json +{ + "ascat_alleles": "/path/to/battenberg_alleles_on_target_hg38.zip", + "ascat_loci": "/path/to/battenberg_loci_on_target_hg38.zip", + "ascat_loci_gc": "/path/to/GC_G1000_on_target_hg38.zip", + "ascat_loci_rt": "/path/to/RT_G1000_on_target_hg38.zip" +} +``` -Or, for a normal/tumor pair: +## What are the bwa, bwa-mem2 and sentieon bwa mem parameters? -| | | | | | | -|-|-|-|-|-|-| -|SUBJECT_ID|XX|0|SAMPLE_ID1|/samples/normal.bam|/samples/normal.bai| -|SUBJECT_ID|XX|1|SAMPLE_ID2|/samples/tumor.bam|/samples/tumor.bai| +For mapping, sarek follows the parameter suggestions provided in this [paper](https://www.nature.com/articles/s41467-018-06159-4): -#### --input <TSV> --step recalibrate +`-K 100000000` : for deterministic pipeline results, for more info see [here](https://github.com/CCDG/Pipeline-Standardization/issues/2) -To start from the recalibrate step (`--step recalibrate`), a `TSV` file needs to be given as input containing the paths to the `non-recalibrated BAM` file and the associated recalibration table. -The `Sarek`-generated `TSV` file is stored under `results/Preprocessing/TSV/duplicates_marked.tsv` and will automatically be used as an input when specifying the parameter `--step recalibrate`. +`-Y`: force soft-clipping rather than default hard-clipping of supplementary alignments -The `TSV` contains the following columns: +In addition, currently the mismatch penalty for reads with tumor status in the sample sheet are mapped with a mismatch penalty of `-B 3`. -`subject sex status sample bam bai recaltable` +## How to manage scatter/gathering (parallelization with-in each sample) -| | | | | | | | -|-|-|-|-|-|-|-| -|SUBJECT_ID|XX|0|SAMPLE_ID|/samples/normal.md.bam|/samples/normal.md.bai|/samples/normal.recal.table| +While Nextflow ensures all samples are run in parallel, the pipeline can split input files for each sample into smaller chunks which are processes in parallel. +This speeds up analysis for individual chunks, but might occupy more storage space. -Or, for a normal/tumor pair: +Therefore, the different scatter/gather options can be set by the user: -| | | | | | | | -|-|-|-|-|-|-|-| -|SUBJECT_ID|XX|0|SAMPLE_ID1|/samples/normal.md.bam|/samples/normal.md.bai|/samples/normal.recal.table| -|SUBJECT_ID|XX|1|SAMPLE_ID2|/samples/tumor.md.bam|/samples/tumor.md.bai|/samples/tumor.recal.table| +### Split Fastq files -#### --input <TSV> --step recalibrate --skip_markduplicates +By default, the input fastq files are split into smaller chunks with FASTP, mapped in parallel, and then merged and duplicate marked. This can be customized by setting the parameter `--split_fastq`. +This parameter determines how many reads are within each split. Setting it to `0` will turn of any splitting and only one mapping process is run per input fastq file. -The `Sarek`-generated `TSV` file is stored under `results/Preprocessing/TSV/mapped_no_duplicates_marked.tsv` and will automatically be used as an input when specifying the parameter `--step recalibrate --skip_markduplicates`. -The `TSV` file contains the same columns, but the content is slightly different: +> FastP creates as many chunks as CPUs are specified (by default 12) and subdivides them further, if the number of reads in a chunk is larger then the value specified in `--split_fastq`. Thus, the parameter `--split_fastq` is an upper bound, e.g. if 1/12th of the Fastq file exceeds the provided value another fastq file will be generated. -| | | | | | | | -|-|-|-|-|-|-|-| -|SUBJECT_ID|XX|0|SAMPLE_ID|/samples/normal.bam|/samples/normal.bai|/samples/normal.recal.table| +### Intervals for Base Quality Score Recalibration and Variantcalling -Or, for a normal/tumor pair: +The pipeline can parallelize base quality score recalibration and variant calling across genomic chunks of roughly similar sizes. +For this, a bed file containing genomic regions of interest is used, it's the intervals file. +By default, the intervals file for WGS used is the one provided by GATK (details [here](https://gatk.broadinstitute.org/hc/en-us/articles/360035889551-When-should-I-restrict-my-analysis-to-specific-intervals-)). +When running targeted analysis, it is recommended to use the bed file containing the targeted regions. -| | | | | | | | -|-|-|-|-|-|-|-| -|SUBJECT_ID|XX|0|SAMPLE_ID1|/samples/normal.bam|/samples/normal.bai|/samples/normal.recal.table| -|SUBJECT_ID|XX|1|SAMPLE_ID2|/samples/tumor.bam|/samples/tumor.bai|/samples/tumor.recal.table| +The amount of scatter/gathering can be customized by adjusting the parameter `--nucleotides_per_second`. -#### --input <TSV> --step variant_calling +> **NB:** The _same_ intervals are processed regardless of the number of groups. The number of groups however determines over how many compute nodes the analysis is scattered on. -To start from the variant calling step (`--step variant_calling`), a `TSV` file needs to be given as input containing the paths to the `recalibrated BAM` file and the associated index. -The `Sarek`-generated `TSV` file is stored under `results/Preprocessing/TSV/recalibrated.tsv` and will automatically be used as an input when specifying the parameter `--step variant_calling`. +The default value is `200000`, increasing this value will _reduce_ the number of groups that are processed in parallel. +Generally, smaller numbers of groups (each group has more regions), the slower the processing, and less storage space is consumed. +In particular, in cloud computing setting it is often advisable to reduce the number of groups to be run in parallel to reduce data staging steps. -The `TSV` file should contain the columns: +## How to create a panel-of-normals for Mutect2 -`subject sex status sample bam bai` +For a detailed tutorial on how to create a panel-of-normals, see [here](https://gatk.broadinstitute.org/hc/en-us/articles/360035531132). -Here is an example for two samples from the same subject: +## How to use varlociraptor -| | | | | | | -|-|-|-|-|-|-| -|SUBJECT_ID|XX|0|SAMPLE_ID|/samples/normal.recal.bam|/samples/normal.recal.bai| +You will need to add another column called `contamination` to the samplesheet for tumor-only or somatic variant calling. There you should add the fraction of contamination the tumor sample. This is `1 - purity` or `1- tumor_cell_content`. If you do not have access to that information for your samples put in a reasonable approximation by literature search for the tumor type you are working with. -Or, for a normal/tumor pair: +Varlociraptor allows the usage of different scenario files, a few examples can be found in the [scenario catalog](https://varlociraptor.github.io/varlociraptor-scenarios/landing/). Currently only scenarios that have information on "normal" (germline case), "normal" and "tumor" (somatic and tumor-only case) are supported. You can use your own scenario file by adding it to the run command with `--varlociraptor_scenario_germline `, `--varlociraptor_scenario_somatic ` or `--varlociraptor_scenario_tumor_only `. -| | | | | | | -|-|-|-|-|-|-| -|SUBJECT_ID|XX|0|SAMPLE_ID1|/samples/normal.recal.bam|/samples/normal.recal.bai| -|SUBJECT_ID|XX|1|SAMPLE_ID2|/samples/tumor.recal.bam|/samples/tumor.recal.bai| +You can control the number of chunks that the candidate VCF file is split into by `--varlociraptor_chunk_size `, it is set to reasonable default (15) but more chunks might aid in accelerating your workflow run if you can run more processes in parallel. -#### --input <TSV> --step Control-FREEC +## Spark related issues -To start from the Control-FREEC step (`--step Control-FREEC`), a `TSV` file needs to be given as input containing the paths to the mpileup files. -The `Sarek`-generated `TSV` file is stored under `results/VariantCalling/TSV/control-freec_mpileup.tsv` and will automatically be used as an input when specifying the parameter `--step Control-FREEC`. +If you have problems running processes that make use of Spark such as `MarkDuplicates`. +You are probably experiencing issues with the limit of open files in your system. +You can check your current limit by typing the following: -The `TSV` file should contain the columns: +```bash +ulimit -n +``` -`subject sex status sample mpileup` +The default limit size is usually 1024 which is quite low to run Spark jobs. +In order to increase the size limit permanently you can: -Here is an example for one normal/tumor pair from one subjects: +Edit the file `/etc/security/limits.conf` and add the lines: -| | | | | | -|-|-|-|-|-| -|SUBJECT_ID|XX|0|SAMPLE_ID1|/samples/normal.pileup| -|SUBJECT_ID|XX|1|SAMPLE_ID2|/samples/tumor.pileup| +```bash +* soft nofile 65535 +* hard nofile 65535 +``` -### --input <sample/> --step mapping +Edit the file `/etc/sysctl.conf` and add the line: -Use this to specify the location to a directory with `FASTQ` files for the `mapping` step of a single germline sample only. -For example: +```bash +fs.file-max = 65535 +``` + +Edit the file `/etc/sysconfig/docker` and add the new limits to OPTIONS like this: ```bash ---input +OPTIONS=”—default-ulimit nofile=65535:65535" ``` -> **NB** All of the found `FASTQ` files are considered to belong to the same sample. +Re-start your session. + +Note that the way to increase the open file limit in your system may be slightly different or require additional steps. -The input folder, containing the `FASTQ` files for one subject (ID) should be organized into one sub-folder for every sample. -The given directory is searched recursively for `FASTQ` files that are named `*_R1_*.fastq.gz`, and a matching pair with the same name except `_R2_` instead of `_R1_` is expected to exist alongside. -All `FASTQ` files for that sample should be collected here. +If you run into errors similar to the one shown below: -```text -ID -+--sample1 -+------sample1___lane1_R1_1000.fastq.gz -+------sample1___lane1_R2_1000.fastq.gz -+------sample1___lane2_R1_1000.fastq.gz -+------sample1___lane2_R2_1000.fastq.gz -+--sample2 -+------sample2___lane1_R1_1000.fastq.gz -+------sample2___lane1_R2_1000.fastq.gz -+--sample3 -+------sample3___lane1_R1_1000.fastq.gz -+------sample3___lane1_R2_1000.fastq.gz -+------sample3___lane2_R1_1000.fastq.gz -+------sample3___lane2_R2_1000.fastq.gz +```bash +Command error: + [E::hts_idx_push] Unsorted positions on sequence #16: 58528620 followed by 58528490 + [E::sam_index] Read 'LH00271:69:2237HHLT4:7:1101:1000:11758' with ref_name='chr16', ref_length=90338345, flags=163, pos=58528490 cannot be indexed samtools index: failed to create index for "sample_19.sorted.bam" ``` -`FASTQ` filename structure: +Please be aware that `--use_gatk_spark` is not compatible with `--save_output_as_bam --save_mapped` because merging the reads to export them to bam files only works when they are coordinate sorted - spark works with name-sorting the reads. + +## How to handle Unique Molecular Identifiers (UMIs) + +Unique Molecular Identifiers (UMIs) are used to identify which reads came from the same original DNA molecule prior to any amplification steps. This is important when sequencing to a high depth on targetted loci, as the likelihood of having the same positions (start/end in the case of paired-end reads) for reads coming from distinct molecules increases with depth. +They may be used to generate consensus reads, where if two or more reads are considered to be part of the same group a novel read is created based on averaging over the individual reads, or solely to help distinguish while marking or removing duplicates. + +Depending on the precise library preparation method the UMIs may exist in several difference places. They may be within the read structure (R1 and/or R2 for paired ends), or they may have been in the index reads for Illumina sequencing. If the UMIs were in the index reads, then sarek can only process these UMIs if they have been already transferred to the read header, for instance using the OverrideCycles option inside `bclconvert`; this may be specified by using the option `--umi_in_read_header true`. + +As an example: if your reads contain a UMI only on the forward read, the string can only represent one structure (i.e. "2M11S+T"); should your reads contain a UMI on both reads, the string will contain two structures separated by a blank space (i.e. "2M11S+T 2M11S+T"); should your reads contain a UMI only on the reverse read, your structure must represent the template only for the forward read and template plus UMI for the reverse read (i.e. +T 12M11S+T). Please refer to [fgbio documentation](https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures) for more details, as providing the correct structure is essential and specific to the UMI kit used. This structure can be passed to `--umi_read_structure`, and will enable the fgbio consensus read generation as detailed below. -* `____R1_.fastq.gz` and -* `____R2_.fastq.gz` +Alternatively, the tool `fastp` may be used to extract UMIs from the template read by setting the parameters `--umi_loc` and `--umi_len` (and optionally `--umi_base_skip`) as [detailed in its documentation](https://github.com/OpenGene/fastp?tab=readme-ov-file#unique-molecular-identifier-umi-processing). -Where: +We therefore have two parallel workflows: -* `sample` = sample id -* `lib` = identifier of library preparation -* `flowcell-index` = identifier of flow cell for the sequencing run -* `lane` = identifier of the lane of the sequencing run +### UMI-aware Deduplication -Read group information will be parsed from `FASTQ` file names according to this: +GATK MarkDuplicates will _automatically_ use UMI aware deduplication provided the UMIs are present on the `RX` tag inside the bam/cram file; this is the case when either `--umi_in_read_header` or `--umi_loc` is specified. The appropriate flag for Sentieon dedup will be set provided one of these two parameters is set. Please note that GATK MarkDuplicates Spark [does not support UMIs](https://gatk.broadinstitute.org/hc/en-us/articles/360037224932-MarkDuplicatesSpark). -* `RGID` = "sample_lib_flowcell_index_lane" -* `RGPL` = "Illumina" -* `PU` = sample -* `RGLB` = lib +### Consensus read generation -Each `FASTQ` file pair gets its own read group (`@RG`) in the resulting `BAM` file in the following way. +Sarek will generate consensus reads using [fgbio](http://fulcrumgenomics.github.io/fgbio/tools/latest/) tools if `--umi_read_structure` is specified. For post-UMI processing depending on the experimental setup, duplicate marking and base quality recalibration can be skipped with [`--skip_tools`]. -* The sample name (`SM`) is derived from the the last component of the path given to `--input`. -That is, you should make sure that that directory has a meaningful name! For example, with `--input=/my/fastqs/sample123`, the sample name will be `sample123`. -* The read group id is set to *flowcell.samplename.lane*. -The flowcell id and lane number are auto-detected from the name of the first read in the `FASTQ` file. +Separately, the commercial Sentieon tool can perform consensus building within the `sentieon_dedup` step; this can be enabled by setting `--sentieon_consensus true`. This may be done with or without UMIs (specified via `--umi_loc` and `--umi_len` or via `--umi_in_read_header`). -### --input <VCF> --step annotate +### Limitations and future updates -Input files for Sarek can be specified using the path to a `VCF` file given to the `--input` command only with the annotation step (`--step annotate`). -As `Sarek` will use `bgzip` and `tabix` to compress and index `VCF` files annotated, it expects `VCF` files to be sorted. -Multiple `VCF` files can be specified, using a [glob path](https://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob), if enclosed in quotes. -For example: +Recent updates to Samtools have been introduced, which can speed-up performance of fgbio tools used in this workflow. +The current workflow does not handle duplex UMIs (i.e. where opposite strands of a duplex molecule have been tagged with a complementary UMI), please use [nf-core/fastquorum](https://nf-co.re/fastquorum) for this case, as well as the case where the UMIs are present in additional FASTQ files. + +## How to run sarek when no(t all) reference files are in igenomes + +For common genomes, such as GRCh38 and GRCh37, the pipeline is shipped with (almost) all necessary reference files. However, sometimes it is necessary to use custom references for some or all files: + +### No igenomes reference files are used + +If none of your required genome files are in igenomes, `--igenomes_ignore` must be set to ignore any igenomes input and `--genome null`. The `fasta` file is the only required input file and must be provided to run the pipeline. All other possible reference file can be provided in addition. For details, see the paramter documentation. + +Minimal example for custom genomes: ```bash ---step annotate --input "results/VariantCalling/*/{HaplotypeCaller,Manta,Mutect2,Strelka,TIDDIT}/*.vcf.gz" +nextflow run nf-core/sarek --genome null --igenomes_ignore --fasta ``` -### Sentieon +### Overwrite specific reference files -Sentieon is a commercial solution to process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency. +If you don't want to use some of the provided reference genomes, they can be overwritten by either providing a new file or setting the respective file parameter to `false`, if it should be ignored: -Please refer to the [nf-core/configs](https://github.com/nf-core/configs#adding-a-new-pipeline-specific-config) repository on how to make a pipeline-specific configuration file based on the [munin-sarek specific configuration file](https://github.com/nf-core/configs/blob/master/conf/pipeline/sarek/munin.config). +Example for using a custom known indels file: -Or ask us on the [nf-core Slack](http://nf-co.re/join/slack) on the following channels: [#sarek](https://nfcore.slack.com/channels/sarek) or [#configs](https://nfcore.slack.com/channels/configs). +```bash +nextflow run nf-core/sarek --known_indels --genome GRCh38.GATK +``` -#### Alignment +Example for not using known indels, but all other provided reference file: -> Sentieon BWA matches BWA-MEM with > 2X speedup. +```bash +nextflow run nf-core/sarek --known_indels false --genome GRCh38.GATK +``` -This tool is enabled by default within `Sarek` if both `--sentieon` and `--step mapping` are specified. +### Where do the used reference genomes originate from + +For GATK.GRCh38 the links for each reference file and the corresponding processes that use them is listed below. For GATK.GRCh37 the files originate from the same sources: + +| File | Tools | Origin | Docs | +| :-------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------------------- | +| ascat_alleles | ASCAT | https://www.dropbox.com/s/uouszfktzgoqfy7/G1000_alleles_hg38.zip | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS | +| ascat_loci | ASCAT | https://www.dropbox.com/s/80cq0qgao8l1inj/G1000_loci_hg38.zip | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS | +| ascat_loci_gc | ASCAT | https://www.dropbox.com/s/80cq0qgao8l1inj/G1000_loci_hg38.zip | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS | +| ascat_loci_rt | ASCAT | https://www.dropbox.com/s/xlp99uneqh6nh6p/RT_G1000_hg38.zip | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS | +| bwa | bwa-mem | `bwa index -p bwa/${fasta.baseName} $fasta` | | +| bwamem2 | bwa-mem2 | `bwa-mem2 index -p bwamem2/${fasta} $fasta` | | +| dragmap | DragMap | `dragen-os --build-hash-table true --ht-reference $fasta --output-directory dragmap` | | +| dbsnp | Baserecalibrator, ControlFREEC, GenotypeGVCF, HaplotypeCaller | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle | +| dbsnp_tbi | Baserecalibrator, ControlFREEC, GenotypeGVCF, HaplotypeCaller | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | | +| dict | Baserecalibrator(Spark), CNNScoreVariant, EstimateLibraryComplexity, FilterMutectCalls, FilterVariantTranches, GatherPileupSummaries,GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, MarkDulpicates(Spark), MergeVCFs, Mutect2, Variantrecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle | +| fasta | ApplyBQSR(Spark), ApplyVQSR, ASCAT, Baserecalibrator(Spark), BWA, BWAMem2, CNNScoreVariant, CNVKit, ControlFREEC, DragMap, DEEPVariant, EnsemblVEP, EstimateLibraryComplexity, FilterMutectCalls, FilterVariantTranches, FreeBayes, GatherPileupSummaries,GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, indexcov, interval building, Manta, MarkDuplicates(Spark), MergeVCFs, MSIsensor2, MSISensorPro, Mutect2, Samtools, SnpEff, Strelka, Tiddit, Variantrecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle | +| fasta_fai | ApplyBQSR(Spark), ApplyVQSR, ASCAT, Baserecalibrator(Spark), BWA, BWAMem2, CNNScoreVariant, CNVKit, ControlFREEC, DragMap, DEEPVariant, EnsemblVEP, EstimateLibraryComplexity, FilterMutectCalls, FilterVariantTranches, FreeBayes, GatherPileupSummaries,GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, indexcov, interval building, Manta, MarkDuplicates(Spark), MergeVCFs, MSIsensor2, MSISensorPro, Mutect2, Samtools, SnpEff, Strelka, Tiddit, Variantrecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle | +| germline_resource | GetPileupsummaries,Mutect2 | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | | +| germline_resource_tbi | GetPileupsummaries,Mutect2 | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | | +| intervals | ApplyBQSR(Spark), ASCAT, Baserecalibrator(Spark), BCFTools, CNNScoreVariants, ControlFREEC, Deepvariant, FilterVariantTranches, FreeBayes, GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, Strelka, mpileup, MSIsensor2, MSISensorPro, Mutect2, VCFTools | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | | +| known_indels | BaseRecalibrator(Spark), FilterVariantTranches | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | | +| known_indels_tbi | BaseRecalibrator(Spark), FilterVariantTranches | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | | +| known_snps | BaseRecalibrator(Spark), FilterVariantTranches, VariantRecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | | +| known_snps_tbi | BaseRecalibrator(Spark), FilterVariantTranches, VariantRecalibrator | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | +| mappability | ControlFREEC | http://xfer.curie.fr/get/vyIi4w8EONl/out100m2_hg38.zip | http://boevalab.inf.ethz.ch/FREEC/tutorial.html | +| pon | Mutect2 | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON- | +| pon_tbi | Mutect2 | [GATKBundle](https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/) | https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON- | + +## What happened with snpeff db 105 + +At the time of writing, the SnpEff db 105 is not available to download from the SnpEff website, or to use with snpeff 5.3a, even with an already downloaded cache. +If you wish to continue using cache 105, we would recommend to overwrite with a custom config the container for the snpeff process and use a prior version of the tool. +ie: -#### Germline SNV/INDEL Variant Calling - DNAseq +```nextflow +withName: SNPEFF_SNPEFF { + container = 'quay.io/biocontainers/snpeff:5.1--hdfd78af_2' +} +``` -> Precision FDA award-winning software. -> Matches GATK 3.3-4.1, and without down-sampling. -> Results up to 10x faster and 100% consistent every time. +Please note that if you do so, the download is not working anymore. -This tool is enabled within `Sarek` if both `--sentieon` and `--tools DNAseq` are specified. +## How to customise SnpEff and VEP annotation -#### Germline SNV/INDEL Variant Calling - DNAscope +SNPeff and VEP both require a large resource of files known as a cache. +These are folders composed of multiple gigabytes of files which need to be available for the software to properly function. +To use these, supply the parameters `--vep_cache` and/or `--snpeff_cache` with the locations to the root of the annotation cache folder for each tool. -> Improved accuracy and genome characterization. -> Machine learning enhanced filtering producing top variant calling accuracy. +### Specify the cache location -This tool is enabled within `Sarek` if both `--sentieon` and `--tools DNAscope` are specified. +Params `--snpeff_cache` and `--vep_cache` are used to specify the locations to the root of the annotation cache folder. +The cache will be located within a subfolder with the path `${snpeff_species}.${snpeff_version}` for SnpEff and `${vep_species}/${vep_cache_version}_${vep_genome}` for VEP. +If this directory is missing, Sarek will raise an error. -#### Somatic SNV/INDEL Variant Calling - TNscope +For example this is a typical folder structure for `GRCh38` and `WBCel235`, with SNPeff cache version 99 and VEP cache version 110: -> Winner of ICGC-TCGA DREAM challenge. -> Improved accuracy, machine learning enhanced filtering. -> Supports molecular barcodes and unique molecular identifiers. +```text +/data/ +├─ snpeff_cache/ +│ ├─ GRCh38.99/ +│ ├─ WBcel235.99/ +├─ vep_cache/ +│ ├─ caenorhabditis_elegans/ +│ │ ├─ 110_WBCel235/ +│ ├─ homo_sapiens/ +│ │ ├─ 110_GRCh38/ +``` -This tool is enabled within `Sarek` if both `--sentieon` and `--tools TNscope` are specified. +For this example, the parameters `--snpeff_cache /data/snpeff_cache` and `--vep_cache /data/vep_cache` would be used. +Both SnpEff and VEP will figure out internally the path towards the specific cache version / species the annotation should be performed given the parameters specified to Sarek. -#### Structural Variant Calling +### Change cache version and species -> Germline and somatic SV calling, including translocations, inversions, duplications and large INDELs +By default all is specified in the [igenomes.config](https://github.com/nf-core/sarek/blob/master/conf/igenomes.config) file. +Explanation can be found for all params in the documentation: -This tool is enabled within `Sarek` if both `--sentieon` and `--tools DNAscope` are specified. +- [snpeff_db](https://nf-co.re/sarek/parameters#snpeff_db) +- [vep_genome](https://nf-co.re/sarek/parameters#vep_genome) +- [vep_species](https://nf-co.re/sarek/parameters#vep_species) +- [vep_cache_version](https://nf-co.re/sarek/parameters#vep_cache_version) -### Containers +With the previous example of `GRCh38`, these are the values that were used for these params: -With `Nextflow DSL2`, each process use its own `Conda` environment or container from `biocontainers`. +```bash +snpeff_db = 'GRCh38.99' +vep_cache_version = '110' +vep_genome = 'GRCh38' +vep_species = 'homo_sapiens' +``` -For annotation, cache has to be downloaded, or specifically designed containers are available with cache. +### Usage recommendation with AWS iGenomes -`sareksnpeff`, our `snpeff` container is designed using [Conda](https://conda.io/). +The cache for each of these annotation tools has its own structure and is frequently updated, therefore it is kept separate from AWS iGenomes. It is not recommended to put any cache for each of this annotation tools in your local AWS iGenomes folder. -[![sareksnpeff-docker status](https://img.shields.io/docker/automated/nfcore/sareksnpeff.svg)](https://hub.docker.com/r/nfcore/sareksnpeff) +A classical organisation on a shared storage area might be: -Based on [nfcore/base:1.12.1](https://hub.docker.com/r/nfcore/base/tags), it contains: +```bash +/data/igenomes/ +/data/cache/snpeff_cache +/data/cache/vep_cache +``` -* **[snpEff](http://snpeff.sourceforge.net/)** 4.3.1t -* Cache for `GRCh37`, `GRCh38`, `GRCm38`, `CanFam3.1` or `WBcel235` +Which can then be used this way in Sarek: -`sarekvep`, our `vep` container is designed using [Conda](https://conda.io/). +```bash +nextflow run nf-core/sarek \ + --igenomes_base /data/igenomes/ \ + --snpeff_cache /data/cache/snpeff_cache/ \ + --vep_cache /data/cache/vep_cache/ \ + ... +``` -[![sarekvep-docker status](https://img.shields.io/docker/automated/nfcore/sarekvep.svg)](https://hub.docker.com/r/nfcore/sarekvep) +Alternatively the data may be stored on AWS S3 storage, therefore the parameters might be: -Based on [nfcore/base:1.12.1](https://hub.docker.com/r/nfcore/base/tags), it contains: +```bash +s3://my-reference-data/igenomes/ +s3://my-reference-data/cache/snpeff_cache/ +s3://my-reference-data/cache/vep_cache/ +``` -* **[GeneSplicer](https://ccb.jhu.edu/software/genesplicer/)** 1.0 -* **[VEP](https://github.com/Ensembl/ensembl-vep)** 99.2 -* Cache for `GRCh37`, `GRCh38`, `GRCm38`, `CanFam3.1` or `WBcel235` +Which can then be used this way in Sarek: -### Using downloaded cache +```bash +nextflow run nf-core/sarek \ + --igenomes_base s3://my-reference-data/igenomes/ \ + --snpeff_cache s3://my-reference-data/cache/snpeff/ \ + --vep_cache s3://my-reference-data/cache/ensemblvep/ \ + ... +``` -Both `snpEff` and `VEP` enable usage of cache. -If cache is available on the machine where `Sarek` is run, it is possible to run annotation using cache. -You need to specify the cache directory using `--snpeff_cache` and `--vep_cache` in the command lines or within configuration files. -The cache will only be used when `--annotation_cache` and cache directories are specified (either in command lines or in a configuration file). +These params can be specified in a config file or in a profile using the params scope, or even in a json or a yaml file using the `-params-file` nextflow option. -Example: +Note: we recommend storing each annotation cache in a separate directory so each cache version is handled differently. +This may mean you have many similar directories but will dramatically reduce the storage burden on machines running the SnpEff or VEP process. + +### Use annotation-cache for SnpEff and VEP + +[Annotation-cache](https://annotation-cache.github.io) is an open AWS registry resource that stores a mirror of some cache files on AWS S3 which can be used with Sarek. +It contains some genome builds which can be found by checking the contents of the S3 bucket. + +SNPeff and VEP cache are stored at the following location on S3: ```bash -nextflow run nf-core/sarek --tools snpEff --step annotate --sample --snpeff_cache --annotation_cache -nextflow run nf-core/sarek --tools VEP --step annotate --sample --vep_cache --annotation_cache +snpeff_cache = s3://annotation-cache/snpeff_cache/ +vep_cache = s3://annotation-cache/vep_cache/ ``` -### Spark related issues - -If you have problems running processes that make use of Spark such as ```MarkDuplicates```. -You are probably experiencing issues with the limit of open files in your system. -You can check your current limit by typing the following: +The contents of said cache can be listed with the following command using the S3 CLI: ```bash -ulimit -n +aws s3 --no-sign-request ls s3://annotation-cache/snpeff_cache +aws s3 --no-sign-request ls s3://annotation-cache/vep_cache/ ``` -The default limit size is usually 1024 which is quite low to run Spark jobs. -In order to increase the size limit permanently you can: +Since both Snpeff and VEP are internally figuring the path towards the specific cache version / species, `annotation-cache` is using an extra set of keys to specify the species and genome build. + +Which is handled internally by Sarek. -Edit the file ```/etc/security/limits.conf``` and add the lines: +Please refer to the [annotation-cache documentation](https://annotation-cache.github.io) for more details. + +### Use Sarek to download cache and annotate in one go + +Both VEP and snpEff come with built-in download functionality to download the cache prior to use. +Sarek includes these as optional processes. +Use the params `--download_cache`, and specify the tool with `--tools` and Sarek will download the relevant cache (`snpeff` and/or `vep`) using their respective download functions. +It is recommended to save the cache somewhere highly accessible for subsequent runs of Sarek, so the cache does not have to be re-downloaded. + +Sarek will automatically download the cache using each tools (SnpEff and/or VEP) to your work directory. +And subsequently perform the annotation of VCF files specified as an input in a samplesheet or produced by Sarek. + +### Only download cache + +Using the params `--build_only_index` allow for only downloading the cache for the specified tools. + +### Location for the cache + +Cache can be downloaded in the specified `--outdir_cache` location. +Else, it will be downloaded in `cache/` in the specified `--outdir` location. + +This command could be used to download the cache for both tools in the specified `--outdir_cache` location: ```bash -* soft nofile 65535 -* hard nofile 65535 +nextflow run nf-core/sarek --outdir results --outdir_cache /path_to/my-own-cache --tools vep,snpeff --download_cache --build_only_index --input false ``` -Edit the file ```/etc/sysctl.conf``` and add the line: +This command could be used to point to the recently downloaded cache and run SnpEff and VEP: ```bash -fs.file-max = 65535 +nextflow run nf-core/sarek --outdir results --vep_cache /path_to/my-own-cache/vep_cache --snpeff_cache /path_to/my-own-cache/snpeff_cache --tools vep,snpeff --input samplesheet_vcf.csv ``` -Edit the file ```/etc/sysconfig/docker``` and add the new limits to OPTIONS like this: +Here is an example on how sarek may be used to download the SnpEff cache for Candida auris: ```bash -OPTIONS=”—default-ulimit nofile=65535:65535" +nextflow run nf-core/sarek --outdir results --outdir_cache /path_to/my-own-cache --tools snpeff --download_cache --build_only_index --input false --snpeff_db _candida_auris_gca_001189475 --step annotate --genome null --igenomes_ignore ``` -Re-start your session. +### Create containers with pre-downloaded cache -Note that the way to increase the open file limit in your system may be slightly different or require additional steps. +nf-core is no longer maintaining containers with pre-downloaded cache. Hosting the cache within the container is not recommended as it can cause a number of problems. Instead we recommned using an external cache. The following is left for legacy reasons. -### Download cache +But for each of these tools, an helper script `build.sh` can be found at the root of the tool folder in the nf-core module repo ([snpeff](https://github.com/nf-core/modules/tree/master/modules/nf-core/snpeff) and [ensemblvep](https://github.com/nf-core/modules/tree/master/modules/nf-core/ensemblvep)), and can be adapted for your usage. -A `Nextflow` helper script has been designed to help downloading `snpEff` and `VEP` caches. -Such files are meant to be shared between multiple users, so this script is mainly meant for people administrating servers, clusters and advanced users. +Overwritting the container declaration is then possible to accomodate for the new container. -```bash -nextflow run download_cache.nf --snpeff_cache --snpeff_db --genome -nextflow run download_cache.nf --vep_cache --species --vep_cache_version --genome +## How to use SnpSift annotation + +SnpSift annotates VCF files with custom annotation databases (e.g., dbSNP, gnomAD, ClinVar). To enable, add `snpsift` to `--tools` and provide a CSV samplesheet via `--snpsift_databases`. + +### Database samplesheet + +The CSV samplesheet specifies annotation databases. See `assets/snpsift_databases_example.csv`: + +```csv +vcf,tbi,fields,prefix,vardb +/data/dbsnp.vcf.gz,,RS;COMMON,dbSNP_, +/data/gnomad.vcf.gz,,AF;AC;AN,gnomAD_,/data/gnomad.vcf.gz.snpsift.vardb ``` -### Using VEP CADD plugin +| Column | Required | Description | +| -------- | ----------------------------- | ---------------------------------------------------------- | +| `vcf` | Yes | Path to annotation VCF file | +| `tbi` | No | Path to tabix index (defaults to `${vcf}.tbi`) | +| `fields` | Yes (if `vardb` not provided) | Semicolon-separated INFO fields to extract | +| `prefix` | No | Prefix for annotated field names (avoids naming conflicts) | +| `vardb` | No | Path to pre-built `.snpsift.vardb` directory | -To enable the use of the `VEP` `CADD` plugin: +### Database creation -* Download the `CADD` files -* Specify them (either on the command line, like in the example or in a configuration file) -* use the `--cadd_cache` flag +SnpSift uses optimized `.snpsift.vardb` databases. If the `vardb` column is empty, databases are built automatically. For large databases, pre-build them and specify the path in the `vardb` column to speed up subsequent runs. -Example: +### Using VEP plugins + +#### dbnsfp + +Enable with `--vep_dbnsfp`. The following parameters are mandatory: + +- `--dbnsfp`, to specify the path to the dbNSFP processed file. +- `--dbnsfp_tbi`, to specify the path to the dbNSFP tabix indexed file. + +The following parameters are optional: + +- `--dbnsfp_consequence`, to filter/limit outputs to a specific effect of the variant. + - The set of consequence terms is defined by the Sequence Ontology and an overview of those used in VEP can be found [here](https://www.ensembl.org/info/genome/variation/prediction/predicted_data.html). + - If one wants to filter using several consequences, then separate those by using '&' (i.e. `--dbnsfp_consequence '3_prime_UTR_variant&intron_variant'`.", +- `--dbnsfp_fields`, to retrieve individual values from the dbNSFP file. + - The values correspond to the name of the columns in the dbNSFP file and are separated by comma. + - The column names might differ between the different dbNSFP versions. Please check the Readme.txt file, which is provided with the dbNSFP file, to obtain the correct column names. The Readme file contains also a short description of the provided values and the version of the tools used to generate them. + +For more details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#dbnsfp). + +#### Condel + +Enable with `--vep_condel`. The following parameters are mandatory: + +- `--condel_config`, to specify the path to the Condel config directory containing cutoffs and distribution files. + +The plugin calculates the Consensus Deleteriousness score for missense mutations using SIFT and PolyPhen-2 predictions from the Ensembl API. + +For more details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#Condel). + +#### LOFTEE + +Enable with `--vep_loftee`. + +For more details, see [here](https://github.com/konradjk/loftee). + +#### Mastermind + +Enable with `--vep_mastermind`. The following parameters are mandatory: + +- `--mastermind_file`, to specify the path to the Mastermind cited variants VCF file (must be bgzipped and tabix indexed). + +The following parameters are optional: + +- `--mastermind_mutations`, set to `true` to return citations for all mutations/transcripts (default: `false`). +- `--mastermind_var_iden`, set to `true` to return only Mastermind variant identifiers as gene:key format (default: `false`). +- `--mastermind_url`, set to `true` to return the built Mastermind URL (default: `false`). + +For more details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#Mastermind). + +#### Phenotypes + +Enable with `--vep_phenotypes`. The following parameters are optional: + +- `--phenotypes_file`, to specify the path to the phenotype annotation GFF/GVF file. If not specified, the plugin will automatically download phenotype data on first run. +- `--phenotypes_file_tbi`, to specify the path to the phenotype annotation tabix indexed file. Required when using a gzipped `--phenotypes_file`. +- `--phenotypes_include_types`, &-separated list of feature types to include (e.g., 'Gene&Variation'). Options: Gene, Variation, QTL, StructuralVariation, SupportingStructuralVariation, RegulatoryFeature. + +The plugin retrieves overlapping phenotype information from Ensembl's phenotype annotation databases, mapping phenotype data to genomic features including genes, variants, and QTLs. + +For more details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#Phenotypes). + +#### SpliceAi + +Enable with `--vep_spliceai`. The following parameters are mandatory: + +- `--spliceai_snv`, to specify the path to SpliceAI raw scores snv file. +- `--spliceai_snv_tbi`, to specify the path to SpliceAI raw scores snv tabix indexed file. +- `--spliceai_indel`, to specify the path to SpliceAI raw scores indel file. +- `--spliceai_indel_tbi`, to specify the path to SpliceAI raw scores indel tabix indexed file. + +For more details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#spliceai). + +#### SpliceRegions + +Enable with `--vep_spliceregion`. + +For more details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#spliceregion) and [here](https://www.ensembl.info/2018/10/26/cool-stuff-the-vep-can-do-splice-site-variant-annotation/)." + +### BCFTOOLS Annotate + +It is possible to annotate a VCF file with a custom annotation file using [BCFTOOLS Annotate](https://samtools.github.io/bcftools/bcftools.html#annotate). This can be done by adding `bcfann` to the tools list. The following parameters are mandatory: + +- `--bcftools_annotations`, path to vcf annotation file +- `--bcftools_annotations_tbi`, path to vcf annotation index file +- `--bcftools_header_lines`, path to vcf annotation header lines file + +The following parameters are optional: + +- `--bcftools_columns`, path to vcf annotation columns file + +## MultiQC related issues + +### Plots for SnpEff are missing + +When plots are missing, it is possible that the fasta and the custom SnpEff database are not matching https://pcingola.github.io/SnpEff/se_faq/#error_chromosome_not_found-details. +The SnpEff completes without throwing an error causing nextflow to complete successfully. An indication for the error are these lines in the `.command` files: + +```text +ERRORS: Some errors were detected +Error type Number of errors +ERROR_CHROMOSOME_NOT_FOUND 17522411 +``` + +## Sentieon + +[Sentieon](https://www.sentieon.com/) is a commercial solution to process genomics data with high computing efficiency, fast turnaround time, exceptional high accuracy, and 100% consistency. + +In particular, Sentieon contains what may be view as speedup version of some standard GATK tools, like bwamem and haplotyper. Sarek contains support for some of the functions in Sentieon. In order to use those functions, the user will need to supply Sarek with a license for Sentieon. + +### Setup of Sentieon license + +Sentieon supply license in the form of a string-value (a url) or a file. It should be base64-encoded and stored in a nextflow secret named `SENTIEON_LICENSE_BASE64`. If a license string (url) is supplied, then the nextflow secret should be set like this: ```bash -nextflow run nf-core/sarek --step annotate --tools VEP --sample --cadd_cache \ - --cadd_indels \ - --cadd_indels_tbi \ - --cadd_wg_snvs \ - --cadd_wg_snvs_tbi +nextflow secrets set SENTIEON_LICENSE_BASE64 $(echo -n | base64 -w 0) ``` -### Downloading CADD files +:::note + is formatted as `IP:Port` for example: `12.12.12.12:8990` +::: -An helper script has been designed to help downloading `CADD` files. -Such files are meant to be share between multiple users, so this script is mainly meant for people administrating servers, clusters and advanced users. +If a license file is supplied, then the nextflow secret should be set like this: ```bash -nextflow run download_cache.nf --cadd_cache --cadd_version --genome +nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) ``` + +:::note +If you're looking for documentation on how the nf-core Sentieon GitHub Actions and Sentieon License Server are set up: [Here be dragons.](https://github.com/nf-core/ops/blob/main/pulumi/sentieon_license_server/README.md) +::: + +### Available Sentieon functions + +Sarek contains the following Sentieon functions from [DnaSeq](https://support.sentieon.com/manual/DNAseq_usage/dnaseq/) : [bwa mem](https://support.sentieon.com/manual/usages/general/#bwa-mem-syntax), [LocusCollector](https://support.sentieon.com/manual/usages/general/#locuscollector-algorithm) + [Dedup](https://support.sentieon.com/manual/usages/general/#dedup-algorithm), [Haplotyper](https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm), [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) and [VarCal](https://support.sentieon.com/manual/usages/general/#varcal-algorithm) + [ApplyVarCal](https://support.sentieon.com/manual/usages/general/#applyvarcal-algorithm), so the basic processing of alignment of fastq-files to VCF-files can be done using speedup Sentieon functions. + +Sarek also contains the Sentieon functions [DnaScope](https://support.sentieon.com/manual/usages/general/?highlight=dnamodelapply#dnascope-algorithm), [DNAModelApply](https://support.sentieon.com/manual/usages/general/?highlight=dnamodelapply#dnamodelapply-algorithm) and [TNScope](https://support.sentieon.com/manual/usages/general/#tnscope-algorithm). + +### Basic usage of Sentieon functions + +To use Sentieon's aligner `bwa mem`, set the aligner option `sentieon-bwamem`. +(This can, for example, be done by adding `--aligner sentieon-bwamem` to the `nextflow run` command.) + +To use Sentieon's function `Dedup`, specify `sentieon_dedup` as one of the tools. +(This can, for example, be done by adding `--tools sentieon_dedup` to the `nextflow run` command.) + +To use Sentieon's function `DNAscope`, specify `sentieon_dnascope` as one of the tools. +This can, for example, be done by adding `--tools sentieon_dnascope` to the `nextflow run` command. +In order to skip Sentieon's variant-filter `DNAModelApply`, one may add `--skip_tools dnascope_filter` to the `nextflow run` command. +Sarek also provides the option `sentieon_dnascope_emit_mode` which can be used to set the [emit-mode](https://support.sentieon.com/manual/usages/general/#dnascope-algorithm) of Sentieon's dnascope. +Sentieon's dnascope can output both a vcf-file and a gvcf-file in the same run; this is achieved by setting `sentieon_dnascope_emit_mode` to `,gvcf`, where `` is `variant`, `confident` or `all`. + +Sentieon's function `Haplotyper` is used in much the same way as `DNAscope`. +To use Sentieon's function `Haplotyper`, specify `sentieon_haplotyper` as one of the tools. +This can, for example, be done by adding `--tools sentieon_haplotyper` to the `nextflow run` command. +In order to skip the GATK-based variant-filter, one may add `--skip_tools haplotyper_filter` to the `nextflow run` command. +Sarek also provides the option `sentieon_haplotyper_emit_mode` which can be used to set the [emit-mode](https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm) of Sentieon's haplotyper. +Sentieon's haplotyper can output both a vcf-file and a gvcf-file in the same run; this is achieved by setting `sentieon_haplotyper_emit_mode` to `,gvcf`, where `` is `variant`, `confident` or `all`. + +To use Sentieon's function `GVCFtyper` along with Sention's version of VQSR (`VarCal` and `ApplyVarCal`) for joint-germline genotyping, specify `sentieon_haplotyper` as one of the tools, set the option `sentieon_haplotyper_emit_mode` to `gvcf`, and add the option `joint_germline`. +This can, for example, be done by adding `--tools sentieon_haplotyper --joint_germline --sentieon_haplotyper_emit_mode gvcf` to the `nextflow run` command. +If `sentieon_dnascope` is chosen instead of `sentieon_haplotyper`, then Sention's version of VQSR is skipped, as recommended by Sentieon. + +Sentieon's function `TNscope` can also be used by adding `--tools sentieon_tnscope` to the `nextflow run` command. + +### Joint germline variant calling + +Sentieon's [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) does not support the [GenomicsDB](https://gatk.broadinstitute.org/hc/en-us/articles/5358869876891-GenomicsDBImport) datastore format. This means that, in contrast to the GATK based joint germline variant calling subworkflow in Sarek, the Sentieon/DNAseq based joint germline variant calling subworkflow does not use the GenomicsDB datastore format. + +### QualCal (BQSR) + +Currently, Sentieon's version of BQSR, QualCal, is not available in Sarek. Recent Illumina sequencers tend to provide well-calibrated BQs, so BQSR may not provide much benefit. By default Sarek runs GATK's BQSR; that can be skipped by adding the option `--skip_tools baserecalibrator`. + +## Requested resources for the tools + +Resource requests are difficult to generalize and are often dependent on input data size. Currently, the number of cpus and memory requested by default were adapted from tests on 5 ICGC paired whole-genome sequencing samples with approximately 40X and 80X depth. +For targeted data analysis, this is overshooting by a lot. In this case resources for each process can be limited by tailoring the request by process name as described [here](#resource-requests). If you are using sarek for a certain data type regulary, and would like to make these requests available to others on your system, an institution-specific, pipeline-specific config file can be added [here](https://github.com/nf-core/configs/tree/master/conf/pipeline/sarek). + +## CNV calling with CNVkit + +The CNV calling in Sarek implements the approach proposed by [CNVkit](https://cnvkit.readthedocs.io/en/stable/). +It is possible to call CNVs with whole-genome or targeted capture data (exome and amplicons): depending on the sequencing approach, Sarek applies different [settings](https://cnvkit.readthedocs.io/en/stable/nonhybrid.html) as recommended by CNVkit. + +### Reference background + +Given the nature of this type of CNV calling algorithms, which rely on the detection of variations in the coverage profile, the definition of a background reference in control data is known to improve the calling in targeted and hybrid capture applications. This is to ensure an accurate profiling, especially in the off-target regions. +We recommend creating a background reference with the nf-core pipeline [createpanelrefs](https://nf-co.re/createpanelrefs). + +:warning: In creating a coverage reference, one should pay particular attention to: + +- the control samples should be processed with the same targeted capture and sequencing technology +- if BAM files are used to compute the background, they should have been processed with the same pipeline used to call the CNVs + +### Germline calling + +Sarek implements the [recommended germline settings](https://cnvkit.readthedocs.io/en/stable/germline.html), i.e. applying the `--filter ci` option in the CVNkit call step. +However, this is defined at a config level by adding this option to the `ext.args`: the user can therefore choose any desired different approach by changing the arguments in a custom config. + +### Somatic calling + +The [available options](https://cnvkit.readthedocs.io/en/stable/tumor.html) a user can choose from for tumour analysis depend very much on the specific design being analysed. Sarek therefore doesn't implement any of these choices, i.e. it runs the CNVkit call step with default settings. +We encourage the user to verify whether particular settings might be more appropriate for their data. diff --git a/docs/usage/variantcalling/img/bqsr.excalidraw.svg b/docs/usage/variantcalling/img/bqsr.excalidraw.svg new file mode 100644 index 0000000000..ffc9dc58db --- /dev/null +++ b/docs/usage/variantcalling/img/bqsr.excalidraw.svg @@ -0,0 +1,17 @@ + + + eyJ2ZXJzaW9uIjoiMSIsImVuY29kaW5nIjoiYnN0cmluZyIsImNvbXByZXNzZWQiOnRydWUsImVuY29kZWQiOiJ4nO1daXPiSrL9fn+Fo9/XQbeqstaJeDGBN7CNd1x1MDAxNptcdTAwMTdcdTAwMTNcdTAwMGVcdTAwMTZhMKtZbMzE/e8vi7aNXGaIxVxiXHUwMDEwd0xHuLtBWCUpT548WVlZ//ljb+9X963l/vrn3i+3X8jVKsV27vXXP+z7L267U2k28CM2/H+n2WtcdTAwMTeGR5a73Vbnn3/+Wc+1q263VctcdTAwMTVcXOel0unlap1ur1hpOoVm/c9K1613/mV/XuTq7v+2mvVit+2MTlx1MDAxMnGLlW6z/ftcXG7NrbuNblx1MDAwN3/7/+H/9/b+M/zpXHUwMDE5XdstdHONx5o7/MLwI89cdTAwMDDBjL970WxcZlx1MDAwN0ulXHUwMDA2SjhcdTAwMDXxeUSlc4jn67pF/LiEY3ZHn9i3fjHdXHUwMDE0pzcq0X+4T3b3eb6duY/HRqctVWq12+5b7fetyFx1MDAxNcq9tmdQnW67WXUzlWK3bM8+9v7n9zpNvFx1MDAwYqNvtZu9x3LD7XS+fKfZylx1MDAxNSrdN/tcdTAwMWUhn+/+vlx1MDAwYv/cXHUwMDFivdPH/1x1MDAwMTBHKFx1MDAwMpSNLnT4TSlcdTAwMWOtxNg4XHUwMDBlmjW89TiO/6Gu/TNcdTAwMWFJPleoPuJwXHUwMDFhxdExpVLBXHUwMDE0zOiY1/erY+rzrbJbeSx38T3uuWR3eIulVkZcdFxun+/bX986KVx1MDAwZVx1MDAxZva/R7e1jWZyYr/Q6NVq3jvTKL7fmVx1MDAwZqNcdTAwMTiZXHUwMDA1vL/z12j89vijcXPymtRcdTAwMTez6rr97ueFeWwg2y9D8Spuolx1MDAwM/f5NFx1MDAxNT1p7KfL6tfncX+9/2s0/F6rmPttUlRcdTAwMWElqVx1MDAxMUKi7X1+Xqs0quPXVmtcdTAwMTaqIyv8w3MhXHUwMDEz5v9lnFx1MDAxZcunXGZ8LVx1MDAxZowyTFOpXHUwMDE2tvzpV1x1MDAxZHrLN46gaPnccMqJXHUwMDFjXHUwMDA3gIJVXHUwMDAx0G3nXHUwMDFhnVaujSY1XHRcdTAwMDJcblx1MDAwZaXGXHUwMDE4Qami3lx1MDAxM31CwjOgd0gwXCJcdTAwMTRcdTAwMDVmQK1cdTAwMGWKL1x1MDAxZkxY/1xmXHUwMDAz5Vx1MDAxY1x1MDAxNFnGQEejaja6t5WBO7ySL+9cdTAwMWXn6pXa25dHObRcXLyJ0V9f3orWKo/WgH9cdTAwMTVwqG77i213K0hcdTAwMTKfXHUwMDA31CvFotftXHUwMDE38Dy5SsNtnyzirZvtymOlkaslJ4eBV+zGP1x1MDAxZVx1MDAxMnU8jymf67j2U/u+nonL2bSk1Pi7I3BcdTAwMTJcdTAwMDKMXHQygu88cJ5n1eAulii81XJFJlq1WvnuIFx1MDAxYnpwXCL9SMFcdTAwMDBtbZyXpIPI1V8++Fx1MDAwZTnlWYnl898lJ1xynGnieX9cdTAwMTfI6Tj9QNv64u4sm7spvFx1MDAxZaVuXHUwMDFljq7d5chJXHUwMDEz9FhrJyfJfO1fMSqogCXCsulXXHUwMDFkdvtXXHUwMDE06YGBIFxcgpaKfVx1MDAwNYHigYBgNkFJhyhjtFx1MDAxNIoxKbz8OIOiKEaSmiCzXHUwMDA1XHUwMDEwt32bovCuXHS2OYpKromi5njucYpKbpCiOPVXTohQjGeEXHUwMDE53ah5XHUwMDEw3Y81j1pcdTAwMWR4Ulx1MDAwMorxzlu+lzrfT4RcdTAwMWSi3GjnS+D2IZvQXHUwMDAyv0Z0m9dOnEngUpnd4qc7mtaRezeSP2imT/drl1x1MDAwNXmSqSzHT4qQpYD/LX5cdTAwMDLiK55cdTAwMTjajZEon8zCxj/9qkNu/IJcYocyJVxi437iKVxiXHUwMDE4XHUwMDA0rqAwflx1MDAxNozg8MTqyPi+gmJcdTAwMWNV/s4rqDlee5tcbopcdTAwMTP/XGJcdTAwMTLpiSst5OJcYs3V6vVMXlx1MDAwZVx1MDAxMrSffL6KPldl8fw17FxixfDQMZRQMYFNpCg+/sHmXHUwMDE1XHUwMDE0Plx1MDAwNlx1MDAwNpKQXHUwMDAwUlx1MDAxOVx1MDAxYqSoa2ViXHRyxKB8v3/Qfa2blHt/XHUwMDEzQlx0xbR/XG6BXHUwMDFiYDhcdTAwMDa2OFx1MDAwMKZfdchcdTAwMDHAXHSKJEVQTJqpXHUwMDEyXG5cdTAwMDJBwToklGCCU0Y8k1x1MDAxM9uQUFx1MDAxOD4uY6bhlFBzXFz3diWUL0cxTVx0SFx1MDAxMIsjNHpcdTAwMTCN9+LnzfjD4PW0d3liXHUwMDEy6kiGXHUwMDFloZI7hCM8p6koyVfP8ZVKbsF8W0RRjdG8XHUwMDAxYnaLokyqdNqJkqwpkofLwsNL92L/xSxHUUYzSbxIWFx1MDAwYkVcdTAwMTlfilJaXHUwMDAxum65eJJv+kWH3v6NTXKjXsRAXGJcdTAwMDT1aIJcdTAwMGaKXG5cdTAwMDBcdTAwMDWzXHUwMDE5Sjhcblx1MDAxOcpo/DlcdTAwMDbEXHUwMDE5XGaF5GCMlDqA9ML3XHSKXCK1siWMdDWCiq2JoOY47nGCim2QoICT8Xc/XHUwMDAwKlx1MDAxNEHZb/Tis1Bnrlx1MDAxONxcZpKtWOauk9tcdTAwMWaI46tidD/0XHUwMDAwtcVcdTAwMTFoalOqI6QzXHUwMDFl0X2HoHKiqEulb1x1MDAxM1x1MDAxNFx1MDAxOFx1MDAwMG2k2q1cdTAwMWGJXHUwMDAyjVx1MDAwZjrJ6PUgdtG8a1x1MDAxNc5fdfWos6SGMtRzi9ZFUJ6HO2b/XFxcdTAwMWKCXHUwMDEwWLxCYvo1h978jSNQXHUwMDA1gERcdTAwMGUy2oP2j0moXHUwMDAwMDCbnpijrYAyknFiZ4P1JC6mXGIoxVx1MDAwMC1Fku1cbigtzebmoFx1MDAwZdbET3P89jg/XHUwMDFkbJCfuPLFJ1x1MDAxM1RrotXi/HRcZk9cdTAwMTeFg8fru/ZAXHUwMDE03GI8fXuVb4ZcdTAwMWSgXHUwMDAyiKOIX/VcdTAwMWVdkZxWVE9SKUEl2TFy4vePtergNZXWT49gjlx1MDAwN/00f6qFUD2B9FxyzijGZVx1MDAwNL2fWnxcdTAwMDJ2+lWH3vqlXHUwMDAz1Fq/XHUwMDFlyqdJ9bQqXHUwMDA01iCdUDRRkHSrXHUwMDE1fIJcdTAwMDKly1hoOKXTXHUwMDFjl71N6SSILzVcdTAwMTlU/ELRJaojKoVa863CUpHW/v15rFx1MDAxN+/H+8/R0IPTaGd8XHUwMDAy+LN+j1x1MDAxMLUyO61YvyekoFxmXHUwMDA3uFPkVFf3hcOj64O+ubmOt6Rrzs1dKoSzT2D8ycn6TIE+e3H7n37VIbd/SbhDlLKT/dSngC9cdTAwMDBcdTAwMTSsYfaJcaYwppTbze0pafjGXGJqXZNPc1x1MDAxY/dWJ5880nRi5Vx1MDAxM0YnNrG0ePb90bylajelu8YzPbt8ZbFz3rhcdH39npDc4WpcZlx1MDAxOFx1MDAxZlx1MDAxNLVy/d5qmT27XCJLci52i55cdTAwMWWOs/3Dx1o/NThcdTAwMWM8PVx1MDAxNY5Zj1x1MDAxY/ZDmNhcdTAwMDPtW78nmFx1MDAwNq5cYl08szf9osNu+4o6XHUwMDAymJTUL7O33sq9b6X1NMfghfMgXHUwMDEyXG4rMJPgS4n71ZhpXVm9OVx1MDAwZXubWT3hiTwmmFx0pNSGicWZSSQ7hVx1MDAxN1x1MDAxYcmlyvck91CusHg8nVx0OzolUIeC9Fx1MDAxMU9i9cq9XHUwMDE1M3tcdTAwMDKQnjBC2C12MqTNuFx1MDAxNq+ty1xuTd6ddPOJ5OVLXGIze5xLX1x1MDAwMGjQSlx1MDAwMZWLi6fpV1x1MDAxZHpcdTAwMDBIR3FcdTAwMDTAe2HEeGaPXHUwMDA3gYI1JPcoYVxuwFx1MDAxOLXV7Fx1MDAxZSM44o1R1Lqye3M895qze8OjpoCTXHUwMDFhX3AyyvHW21U3XHUwMDBigzNVauXKhdvDSDVD44/VZidJT0XowYkkZFxmmyQnLVx1MDAxY7rmciWhXHUwMDFjxagxwI2dgFdT4kakTo2yXHUwMDBlnZ7Ch8GYR+h+xJFcbqNILWhcdTAwMDCzw5P0xebTly92XHUwMDE5QY9H1DL6x3Onc+3ufqVRrDRcdTAwMWXHv+I2ij6f1HKd7kGzXq90cVx1MDAxOFfNSqM7fsTw90bb7eZr2c1N3Fx1MDAwYvzNvp+17K/7ytejf+2NjGr4n89///tcdTAwMWZTj/Z98PY1+chHv+5cdTAwMGbv30ujnfnnMalRWlx1MDAxYrXEQuTzhGy0aEFV04c0X0KTaJ89lsOOdsqkQ7TRwnCGt1x1MDAxN8alolx1MDAwNkeC4lZcdTAwMTlcdTAwMTlOgfGxkVx1MDAwNUjJRk5Odn/CnjjoeL/w9MeEXHUwMDFiXHUwMDA2TJqvKdO/XG7a8X5cdTAwMTnF+bfSnH9ztE971MMvfjzkJfE9W3D6lzlqglpcdTAwMTOALVx1MDAxZW7HXHUwMDFln5/LvTNVT5zc9rNcdTAwMWS3er1fOFxyO8ZcdTAwMDHA4VNToVxm7CxcdTAwMWXCOoBiklXWM1PCXHJjwut9dkFy0kxfxtL87jTaUFx1MDAxN93n50FcdTAwMWSNOoRcdTAwMGKaucdtj1c6gi2vkItcdTAwMTdSTb/msCOAXHUwMDEzXHUwMDA3pTWTmkg5uZyZcVx1MDAxOVxmXGaCX89MhP1cbvVcdTAwMWO9XHUwMDA1xclcdIb9XHUwMDFiU5zrWs88x3dvcz2zUL7TdYyidYJRYnGInmSvXlx1MDAwNoy9uZ1i9bpWU53zRCv8XHUwMDFkcVB2XG42noz5TVLGIYE03Vitplx1MDAwNEFIhaFSXHUwMDA0gMVccpLUyVlbJ6B5dHjDqHyq64P723Q0hEUlXFzO6LpBOVx1MDAwNlx1MDAwN2ZxLTb9osNcdTAwMGVcdTAwMDFFXHUwMDFko1xyXHUwMDEwXHUwMDEwWk/UlDCugoHBXHUwMDFhikpQPEqqaVx1MDAxMCmXXWGpdVx1MDAxNZXMcd7bLCpR/ulcdTAwMTKNQVxyoVx1MDAxYZZYMFa8uk9cXMHxeVwi0oxmtLygd+Y+7Fxi5Vx1MDAwNqnIXHUwMDE2bkxU5FslxY0mXFyu3lx1MDAxOWq1snwjXHUwMDEwwGo92c+1kVR+8PCaaKpu7OAqcnVcdTAwMTg9vLy5Se+HcPJO+lx1MDAxN/5cbs2FXHUwMDE0yyRcdTAwMTOmX3TIISBcYnekslx1MDAxMFAgJlx1MDAxNzVbKVx1MDAxNVxiXHUwMDBl1jB9XHUwMDA3RFx1MDAwM2OwZS2lJWyu9HFds3dzvPc2a/OVf/c25ChcZq+MYIuXfyUhXHUwMDE5Ob4pZ1x1MDAwZbLPqcFJtaYuY24y7Fx1MDAxOFx1MDAwNaQppZXtbjGhpbRDeVx1MDAxML03VtRS+CRsXHUwMDEzXHUwMDFjvVs1JskyfXZcdTAwMGJlOG6rU3NK6oem3i+EUEuhfvDDgG26j5H94oHa9GtcdTAwMGU5XHUwMDAyOFx1MDAxMVx1MDAwZVxuJkolmSqlZDAoWEd3KKKUYkJstT7/7yGl5vjurUop6p/skMY6xyVcdTAwMWFgl1x1MDAwN9nHXHUwMDE461aPKieF6v5Ftdvv5UNcdTAwMGZQyVx1MDAxZEOn1ZlYilx1MDAwMs3V6t03VlRShqProLBjSirdeTt/aSa6pdTJoJpcdTAwMTankTM2uFx1MDAwZaGSXHUwMDEyM1ao2KZhTCyzhnL6VYdcdTAwMWRcdTAwMDOK2JS3sZUtPlIqXHUwMDEwIKyjXHUwMDEyklNFrLlstVx1MDAxMnKzNLW2dc6z/fc2tZT0uMWJNqOccaPUXHUwMDEyfXbbhMXzlav0WfKy43aahy/JlFxi/Vx1MDAwNlpcdTAwMWPAXHUwMDAxM1x1MDAxZSt+XHUwMDEwlZFMXHUwMDFist1Ou+gjtF10vlvTUlx1MDAwZq88XHUwMDFmSTxlTM28qoebWPf27egqjFKK+XfapcxokGxxLTX9okOPXHUwMDAw7WghuVxyS6drqUBQsFx1MDAwNi0lMZKQYqu94Fx1MDAwNeeb7MWxLik1x3VvVUop//U0dlx1MDAxMZPQQFx1MDAxN0doSbZYsV2vZtK5y4eyvGtkn9/Cv9xcdTAwMTOog5dppnOUQFx1MDAxZFx1MDAxM0SB3ypiSlx0oSRTbLfSfS+DWHH/pa8rMfeqltf7kZI8ulx1MDAwYqGWkjPWVFx1MDAxMnzwiFx1MDAwMLV4PmH6VYdcdTAwMWVcdTAwMDLKQbgjXHUwMDA0zFBLTUqpQGCwjma7tllcdTAwMTDh6r8o4bcuJTXHeW9TSWniz1LCgEUoXVxcSSWzt3HzmrpM311ccnKvoC6iifBvRSyMcabvxTqs8IPx+aotlKFLyuxcdTAwMWFcdTAwMTi+WzT1VO5WX/l1Vsf711x1MDAwZjl1WHHb8adcdTAwMTCWoUvjv2uPIFx1MDAxNFx1MDAxNPAlXHUwMDE2V06/6pBjQFx1MDAxMulwQFxmwHsh+lx1MDAxOE2pYHBcdTAwMTB4XHUwMDFkOvonNJXtKilBtdz9KvQ5rnubVehcdTAwMTi7zOAobavQl+hcYj/oXHUwMDFjg5u5XHUwMDE3pVrq9un2gqfrqVj4lZRcdTAwMTRcdTAwMGVw21p6arZcdTAwMGZ/rK6kVmxcbm/bXHUwMDFla1xyOyalTkUkVyGlQlx1MDAwZurtxln8+CTJr09C2DtKat+lUtSgz15u49PpV1x1MDAxZHZcZijmcGIk5UKyieZRw3xfXHUwMDEwOFhHY3hOXHUwMDA0M8RsN+G3USm1rlx1MDAwZVJzvPc2O0jpXHUwMDE524ejZTI0zSWqJ55pXlx1MDAwYpbMkXRF9tNSiIp8iodcdTAwMWSiXHUwMDEymEOmtpBcdTAwMWFKKa1cdTAwMDKYlFqxeoJcdTAwMDJgXHUwMDFjXHUwMDFiRFx1MDAxOdNcdTAwMDZZ6rXazpLsY6ZcdTAwMTFcdTAwMDF2XHUwMDEwrVx1MDAwZXpXp3dh7CGluO+kXHUwMDE0XHUwMDA3wySVsLiQmn7RoUeAciRDXHUwMDA0vNdOjOf7VDAoWEfpXHUwMDA02L3PtFx1MDAwZWJn1F1hqXUl/OY47y01kVx1MDAwMv9Un6KUMWaWSPWxJ54t3p9mrkk1dX/4pJv8pVx1MDAxNfq9WcFcdTAwMTbYTltoT6kzvt9WkNxEnFx1MDAxOUhkjlx1MDAxOVu49clVWlxuXHUwMDAzPIj97oLuISNcdTAwMTlVniVFPz1kPlx1MDAwZph41PY1esij7//h/XtZLPvvQaQ5Y0DpXHUwMDEyc8vVkmrLk448fLw5Ok88XHUwMDBm7nqJdPjX5SvjUCpsU1BjhNJipI8/XHUwMDEwLant26ox3pOeLlx1MDAwNSHAte0oJkQgzTNcdTAwMDLHNVx1MDAwZZj84HrK0ZvBNeX+5cd2XHUwMDBiXHUwMDE3RVx1MDAxOF9iPq4myEvpJnpD3Dt29HJcdTAwMTUjt+nzsCOb02HViOBKK07luJAkdraOaW6okFx1MDAwNLx73W1cdTAwMWbZOCZcdTAwMDZayjBcdTAwMDL766q2XHUwMDFmYH9cdTAwMWWwXHUwMDE5YDPlK42Nsr10+Fx1MDAxMtL4rGS6MpFzXHUwMDBib4lWunZcdTAwMTm5OM6ch37bdcGHXHUwMDEzfMKureF2fcFcdTAwMThh44PQXHUwMDFhdYig2ni3oNg+rKkmhNreJ6Fr3Wo5gXP908xxytFcdTAwMWLCtfSfnGRIXlx1MDAwNDXnXHUwMDEyO9pAJFx1MDAwNk+Z29LhQbZ4mrvP3MVSoVfVXFxxR2pBNFx1MDAxMVx1MDAwNu85XHUwMDFiK1x1MDAxZUBgXHUwMDEzSTk3YljRXHUwMDExqkjcXHUwMDAwulx1MDAxYsJoXHUwMDE4XHTb7jT/rVxu7Vx1MDAxZmC/f38lYIN/7ztjJFKCdzPNueXbR/y+8HjLbuP1bKp7ljx7LOFcdTAwMTNcYjmuUVU7VFxyu1NcdTAwMGVxPfotXHUwMDFmXG7bNpZcdTAwMTHCNthcblx1MDAxOa5ta2gwPJSBuJT6J3M27ejN4JpcdTAwMTP/npbIXHUwMDA0XG448MVcdDtS18WbwlupUTYvpW66Sp9cdTAwMWJng7BcdTAwMDNbcrypQFx1MDAxNd5cbsrt1PRcdTAwMDSwXHUwMDE5XHUwMDExXHUwMDAyODFcdTAwMDCMj49ru8CWRGBQXHUwMDE1ypS4XHUwMDA06ZlA+Vx1MDAwMfbnXHUwMDAxwVx1MDAwMtut1SqtzvTsXHUwMDE58a2SUlx1MDAwNFlba754kZSqmOPH0+L9yWv+XCJcdTAwMTWRXHUwMDBmrd51yW/+udBudjqRcq5bKG9cdTAwMWbeXHUwMDFjhfbUUkFqXHUwMDFjObtNp0uAXHUwMDAy/V4tu3A+liOT4Wv0KD6BrVx1MDAxY0qGXHUwMDFiKP0+goxcdTAwMWPPZz0vp8xcdTAwMTC5ln7rK8LbMO/+jCtWUMwyYulcdTAwMWZ4gsIwTCwxt/OSr9cj0SO3+li5uzxPRkX27Vbvglx1MDAxMaOPd4RcdTAwMWXfq3hoxlxmXHUwMDA1pZjdyG/bdmz1XHUwMDAxMd5lXHUwMDAzIbJjaWBcdTAwMTma+q5cdTAwMWQz/1xyLtBcdTAwMTNzXHUwMDA12iwuoFx1MDAwZU8z3VKsqF9cdTAwMDd1nS2nXHUwMDFh96R+5tfeP1R2LDGa4or7uGOhOFx1MDAwYrVDtlx1MDAxYuxoiq4vhPvYKFx1MDAwMsSj8NZU00b9e42gf1x1MDAxMmjEni3D55nxXHUwMDFi2c9cdTAwMTbJ/nU1cv5Ya+Xf8lx1MDAwZr0rv35AoTJjipJcdTAwMDH9hlx1MDAwMlx1MDAwMPRsdlx1MDAwMu6LLVMhXHUwMDFjXG6c2V2PjDbafzumVVxmmkrhXHUwMDEwu5aWSKNcdTAwMTn3JFx1MDAxYWfUtXF0xGjhsK11rLazp51cdTAwMTJkm9tcdTAwMWKx7Zbctos2uvfoNpp1j3f+UuVWc0tfLf5rjVu32fIrcPsy8vFqNv+zXHUwMDA3Udzm21x1MDAwZsV/ek1rXHUwMDAwo9TiKD18qt/UK+WT04db95pcdTAwMTZq6qXeNruAUnTTjmBCS2RcdTAwMWPFXHUwMDA0XHUwMDFmT8VzhZRgS2IkJ1xuI23/nFx1MDAxZCtcdTAwMTmX8+/B1IhhU1x1MDAxNDyFXVx1MDAxY6P5XHUwMDE03pla7KZcdTAwMDQ6l22tkdhcdTAwMDJML2K3e20rvjdcbs/Js65cdTAwMTOW1DO1MlF0avBFOCxcdTAwMGXMKKukL4/jkUg1XHUwMDFlbyePS2dcdTAwMTe6kdtcdTAwMDVgUoxcdTAwMDKtdDOGXHUwMDEyRVx1MDAwMb6yJ9NcYluBelx1MDAwMYM1XGbF9Kpz3z7saahjjdx2n1x1MDAxMV+i0ZlLl7RcdTAwMDBiXHUwMDAy6eK1K7j8QMzeVbntXHUwMDE29zqFZnvDXHUwMDE0OntcdTAwMDTrxCtcdTAwMDP/rUc5+nIqloCrXHUwMDBiXHUwMDE3PP1aPCpF3U7mIH+TjZ4ov9nscMGV2dVKYFxi6lwiTej4enjEj4PyXHUwMDE1XHUwMDEwroZq28dmPYC18+rKhrtcdTAwMWZrRlx1MDAxNkGspohZXHJsW3vebFx1MDAwMbCtttvBUe1VXHUwMDFhe8X87cXVvzaL1lx1MDAxOadfJ1Slb66bKYH6XHUwMDE4mWRhpCZbXHUwMDE5QUnv7jJyVXS71excdTAwMTm0iuldQCpGXHUwMDBmzjC+l4BcdTAwMDEvI/xrjlx1MDAwNYA5XHUwMDFjXHUwMDAzUVx1MDAxNK2C41ErbVx1MDAxM+zfxYU4XHUwMDFjqV0z855NWYhZmc1cdTAwMWXIQIrKdlx1MDAwNaiN5maR6T3fOqFcYr4pXCJKpbJJOD66yfOwWI+LfidTf2IniadWtaEvL59cdTAwMGZcdTAwMGV3XHUwMDAyi0Y6UrJhQ05cdTAwMTSf8LXAXHUwMDEzgDiU4e2wW3ZcdTAwMDPnq9ZtT4dcIlDH6lxuyajQzGfP7mnTyuhcdTAwMWPsVMPWo9xvVWh/XHUwMDBii29uZ7Ng/HLCtVx1MDAxMqPvOnxF7NIoLka2N1x1MDAwZoyXmT5liVT7ilZad+xi0Dw3tYNdXHUwMDAwoySo9uxeXCJcdTAwMTJ/Sm/UPlx1MDAwNCNcdTAwMDa4SIuMMYJcdTAwMTGsUSRMxEhcdHDNJFx1MDAwNDBcdLwrYPy7XHUwMDEyI1x1MDAwM/+2NbZLNkNnvXiUetE7es41oJy7ub4+PCXJw8jdSX1cdTAwMTfASFx1MDAwMZmRo++xYtJcYqSmr2hUXHUwMDE0cUJtplx1MDAwNShcdTAwMTCq5djIXHUwMDAy0pOSO7Z5XHJgnCyGXHUwMDEz61x1MDAwYsFRcVx1MDAwMprKrfVcdTAwMDXYXHUwMDAyXHUwMDFjW80uXHUwMDBlqpKr7bntdrPd2bSg9D/9OvtcdTAwMDNQ7t/Axlx1MDAxNkVyucQ+qvX+bZy5+/tH8eRb7Ch538gl72BcdTAwMTewXG6cOMTgXHUwMDFmpqjRmpgx5uTSIciokjGtOFx1MDAwMnY9yVpNXHUwMDFjvN/DUihmd1x1MDAwN1x1MDAxY8XSn1AlXHUwMDBlRq1cdTAwMDLdXG5DhEpJmCch8I5dRqSxXHUwMDE1k0HMqlx1MDAwNDt7z1xiXlx1MDAxNOM/XHUwMDBiXHUwMDEyJ4/2ffT2XHUwMDE1mfLUR7/wXHUwMDBm79/Lwt9/bz5cdTAwMDRccnCliVi8XqdyXHUwMDE4u9vvvZQoZd1iOtt8NoPjh12AP955x1CBKoFQZSukx1x1MDAxMkpcXDhIoFx1MDAxYbTSdqNcdTAwMTmyXHUwMDFlqmbckdbfKi5cdTAwMDRGXHUwMDA3nE6ZQ1x1MDAxZI37o2M3eqyA9pFcclx1MDAxY+yGedPXP2D/ePk/6LGvr1x1MDAwNO1cdTAwMTmdf7hcdTAwMDCkXGK1xJKHp2Y+X+BcdTAwMDftXGZ9yZjq/VtcIle8bu9cdTAwMDK0XHUwMDA1gKNcdTAwMThHbWnR7cnZ/Vx1MDAwNjY4iH1cdTAwMGIgW2LkjTaDXHUwMDA0trBcdTAwMWTFkZDRr1x1MDAxM6WImNI/kjhcZp06J8PaMibQJjypsvc5XHUwMDFl28GIhnB5XHUwMDEzXCJdYGz0rVx1MDAwZV5/c6T7PvnhtyefeUDQp/7YR5eMXHUwMDFhXHUwMDFjQ/vF10RcdTAwMWP1sm/7+41odb/55rpHUZa6rPitY1xmXHUwMDE19iVQR0olqb3BwrurxW/wc8dOslwi5aP5qnU1+1x1MDAxN7ZjXHUwMDExMFxmx6l1NFPkN3FsZyAh8JlIbYSmXHUwMDEzIT3VtoCDsdCtkEDoK+NcdTAwMWTwXHUwMDBm9D9evlx1MDAwZt6+XCKTz3xJ6Ptvv+OLfEqGbSPo4onwrEi/XHUwMDFk5HuHplx1MDAwNdXk21FcInbejrJdQD63y0UoXHUwMDEzgFJcdTAwMDRlzFhHTlDgMECRZV2vVlx1MDAwMOuJ57lyXHUwMDEwtoZPbEg3M1x1MDAwZi6MwtFcdTAwMDdcdTAwMDH1XUm8XHLzXZtNto2dcq3TxNy3XHUwMDE4XHUwMDEy7YJLlNmLJ8Ozrccj2elcdTAwMWZcdTAwMGbi2f2Xw4PXk9515WhcdTAwMTdcdTAwMDApNHc4hjuEM0tcdTAwMWFcdTAwMTOA5I5gXHUwMDFhhbdmXHUwMDE4XGKtuvg4ODhyXHUwMDE1VC/OXHUwMDFmMIZcdTAwMDGMzFx1MDAxN4vGSG4482TD51x1MDAxNjrG6dm9rNSvslwiccq7hYKKseQuYFx1MDAxMUA5XGI1NG3EouKeOZ5hXbJhXHUwMDBlk4pyu6tcdTAwMDJcdTAwMTcwY1nPSvNSdm9cdTAwMTMwXHUwMDEzW3CNwDiR6kLg4ldcdTAwMDJpXHUwMDAzsF00erzfXHUwMDFjNFKyWSh6z7dcdTAwMThcdTAwMGX5t3A4Y9JJ2a54ii+xcodcdTAwMWPdllx1MDAxZe5FYvBaVM3yVe8527/q7Vx1MDAwNFx1MDAxMFx1MDAxNXHsXHUwMDFlxkBcYlx1MDAxN1xiXHUwMDAzXHUwMDE4a1x1MDAxY2+Ig6JcdTAwMGYjVKJt5L6mguPlgUhcdTAwMTVcYqTr7XWNx7hcdDVcdTAwMTVXK2/B9YNET+P9yfXaWlx1MDAwMCxRxJhcdTAwMTk8prPnnWSVPqeSL6r1UILb3ShiXHUwMDA042i8WsJcdTAwMDRcdTAwMDFJxjZcdTAwMTlCQlx1MDAwNKmlXHUwMDEwaHbEyDUt1FlcdTAwMWWGqFx1MDAxMolNXHUwMDFj6p3Xij8wnNFcdTAwMDFaSY2ahanFVWLs9DqejMj284Uq8f5bp5DpXHUwMDE1dqL/XHUwMDA3J9QhlFx1MDAxMo1wo3a2dVx1MDAwMoj4odFcZilTcLpcdTAwMWUgMkS7XaNcYvqjkcJClMhB40NcbkH94jI7p66GRbZhLLJccmGR+aZQNTWKg2CLi8T7+8OUhNQtacpqLX+UfmxHobxcdTAwMTNQlLaTiKaCSMVtf9exrVx1MDAxNlx1MDAxMItE4d0wctjeeU0p1G+C0XZM4oJcdTAwMDQxVfpcdTAwMDPGd9vZXHUwMDE2XHUwMDE4fYnRzqtcbuatkplbnthNpctCV1x1MDAwYiyhc0lDXHUwMDFmb85cdTAwMTK7seDNaFSCVNmNqcWESmSOkswoMExphqIoREi0a1x1MDAxMpHQzVx1MDAwZivuPFx1MDAxMMWMvWhccpqe0UvkTk9O+k3onzz0abH+9HLjpnqt58wuINGWXHUwMDEzXHSpgaJcdTAwMWNE5tNj+1x1MDAxOVx1MDAxOHBcdTAwMTCEgMpMounxUEWoVOKAgJitl/T/YHFVLErfpaegpN2JXHUwMDE1XHUwMDE2n+Mvv0Wfze3pc6zZyb3w7EP2IML8mlx1MDAxOYdcdTAwMGKKitgtg7hQxs6XjzeMRFrkiqDR21xiXHUwMDE2jFxizTRcdTAwMDayoU1fgtzePFx1MDAwNrf1KN6VXHUwMDFmP2mb71x1MDAwMVx1MDAxMfxcdTAwMWJcdTAwMWTZpSTobtni6dN07JZmXHUwMDEyiefTg1x1MDAxYlFN11x1MDAxZUS7w3ZcIn0qXHQ4WnG7vszOXHUwMDE5svFcdTAwMDCVOlx1MDAxMvlQXHUwMDFhbjihPDzTXHUwMDE4Wtk+joL8XHUwMDE3Rad/VyD6t0+mtjbZXHUwMDAycfHo9PX5sNeo7KfPb+OnXHUwMDE3KZ54bWZq1Z1cdTAwMDBcIkhHc7RoRFx1MDAwMVx1MDAwM287oFx1MDAwZiBcdTAwMGWrX4S0fVx1MDAxMqRYT8Xrd5DI7Vx1MDAxM6L/VVx0m51B4lx1MDAxZu8lrr9yrdZtXHUwMDE379mvjypifEiV4vuFj87966Xivu5PNVx1MDAxOPuyO9hcdTAwMGbRbXHkXHUwMDBlS5L/+uOv/1x1MDAwN0YrJq8ifQ== + + + + + ATATGCGTCGATGTGTGACGreference genomeNGS readoriginal Phred scorepresent in dbSNP?noyesnopotential errors?errorerror10101020202020101010 diff --git a/docs/usage/variantcalling/img/clinvar_results.png b/docs/usage/variantcalling/img/clinvar_results.png new file mode 100644 index 0000000000..5e88f3d961 Binary files /dev/null and b/docs/usage/variantcalling/img/clinvar_results.png differ diff --git a/docs/usage/variantcalling/img/clinvar_search.png b/docs/usage/variantcalling/img/clinvar_search.png new file mode 100644 index 0000000000..df8c58d84d Binary files /dev/null and b/docs/usage/variantcalling/img/clinvar_search.png differ diff --git a/docs/usage/variantcalling/img/gnomAD_COL6A1_v2.1.png b/docs/usage/variantcalling/img/gnomAD_COL6A1_v2.1.png new file mode 100644 index 0000000000..2fbc497fe0 Binary files /dev/null and b/docs/usage/variantcalling/img/gnomAD_COL6A1_v2.1.png differ diff --git a/docs/usage/variantcalling/img/gnomAD_COL6A1_v4.0.png b/docs/usage/variantcalling/img/gnomAD_COL6A1_v4.0.png new file mode 100644 index 0000000000..ee148dba85 Binary files /dev/null and b/docs/usage/variantcalling/img/gnomAD_COL6A1_v4.0.png differ diff --git a/docs/usage/variantcalling/img/gnomAD_constraint.png b/docs/usage/variantcalling/img/gnomAD_constraint.png new file mode 100644 index 0000000000..9183272073 Binary files /dev/null and b/docs/usage/variantcalling/img/gnomAD_constraint.png differ diff --git a/docs/usage/variantcalling/img/gnomad_search.png b/docs/usage/variantcalling/img/gnomad_search.png new file mode 100644 index 0000000000..c5577353cb Binary files /dev/null and b/docs/usage/variantcalling/img/gnomad_search.png differ diff --git a/docs/usage/variantcalling/img/gnomad_var_present.png b/docs/usage/variantcalling/img/gnomad_var_present.png new file mode 100644 index 0000000000..5e42034bc2 Binary files /dev/null and b/docs/usage/variantcalling/img/gnomad_var_present.png differ diff --git a/docs/usage/variantcalling/img/interpretation.excalidraw.svg b/docs/usage/variantcalling/img/interpretation.excalidraw.svg new file mode 100644 index 0000000000..5ef108ab30 --- /dev/null +++ b/docs/usage/variantcalling/img/interpretation.excalidraw.svg @@ -0,0 +1,17 @@ + + + eyJ2ZXJzaW9uIjoiMSIsImVuY29kaW5nIjoiYnN0cmluZyIsImNvbXByZXNzZWQiOnRydWUsImVuY29kZWQiOiJ4nN1aaVPbWlx1MDAxMv3Or6A8X2aqgnL3JVVTU6xcdIHHmlxmTGZeTVx0SdiKtVx1MDAxOEnGNq/y36evXHUwMDAwS7LlXHLsh1x1MDAxOZNcIuZqa/Xtc0533/vHxuZmI1x1MDAxYnS8xqfNhtd37MB3XHUwMDEzu9f4YMbvvST141xiXHUwMDBlkfzvNO4mTn5mK8s66aePXHUwMDFmQztpe1knsFx1MDAxZM+699OuXHUwMDFkpFnX9WPLicOPfuaF6T/M71x1MDAxMzv0/t6JQzdLrOIhW57rZ3Hy+Cwv8EIvylK4+7/h783NP/LfJetcdTAwMTLPyeyoXHUwMDE5ePlcdTAwMDX5ocJArNDo6ElcdTAwMWPlxlxupFx1MDAwNFx1MDAxN1LT4Vx0frpcdTAwMDePyzxcdTAwMTeO3oLJXnHEXGY1jlx1MDAwZf91luz0kv/2PH3tUvfyN+Q9XHUwMDE0T731g+AyXHUwMDFiXHUwMDA0j56wnVY3KdmUZknc9q58N2tcdTAwMTm7RsaH16UxOKG4Kom7zVbkpWnlmrhjO342MGOoeL9HJ3zaLEb6Zoo0sbDmXHUwMDE0XHUwMDEzPlx1MDAxY8+v5NjSXGYzJEpcdTAwMDdcdTAwMWWN2Y1cdTAwMDNwP1x1MDAxOPNcdTAwMTfsmZ/CnFx1MDAxYttpN8GmyC3OuSG35OamOKf3/IpCXGbHWp7fbGXG6ax4lpc7mlxuglx1MDAxNMK8mFx1MDAwNvOAzqGbT/nvhXdcdTAwMTNcYpZDc0XUXHKCsoNcIvfJQc+hUVx1MDAwNFx1MDAwN31cdTAwMWH5VbyBOX9/NKjKgVVcdK7M62fDVyuFwlbfXHJ6Z+xob3dv15e/nfZuz+95Y3jer1x1MDAwZvW3fbyYOL6jzpG/XHUwMDE3d/Y0uf56cHnmXFxWn/L8fDtJ4l7pvk/fXG63dDuu/Vx1MDAxOLFYaIU4ZVx1MDAxOKuS51x1MDAwMz9qj/osiJ12XHUwMDEx5Fx1MDAxYiWDx8BVef8yrjCZhCvMJMNCcaHnXHUwMDA2Vr0311x1MDAxY1iAKEsxTjm4nVx1MDAxMCbwXGK81FLglSV2lHbsXHUwMDA0wrVcdTAwMDZihFlCa1xy0645R1LhccRxNIY4Rlx1MDAxNNdcdTAwMTi/XHUwMDFlcJVcdTAwMDNjyJpcdTAwMTakSGKlXHUwMDE3XGLSwqo4yi79XHUwMDA3M1x1MDAxZFx1MDAwNFVGXHUwMDBm7NBcdTAwMGZcdTAwMDaV6cyj16AoiuLMziBI083/RH/tJKAwjvnzb43KuduB3zTR3XDgXHUwMDFkvKRcdTAwMTL4mVx1MDAwZvo0PCH0XbesOFx1MDAwZVx1MDAxOGD7kZdcdTAwMWPOo1x1MDAxNHHiN/3IXHUwMDBlvtXYV29cdTAwMWR4yPvyPKXYKsXUjZ165iiMMzpcdTAwMTXL04WSs9HRIaA5XHUwMDAxoeSSXHUwMDE0jp1cdTAwMDXom+CAYPefbvv4XHUwMDAxb1xycLh1cXSarjuguUCApjqlJFx1MDAxNlxi1Mp0spSBXHUwMDE0OjmGWi4wlVKxXHUwMDAyNe9BJ1W37Udp2Fx1MDAwZn+GO4PzXHUwMDEzR3Wi2/a8Oun8uOtep/2tXGLT+9Bv0pvBXHLeW55OgkrxMsBWo5NiXCKsXHUwMDA0U1pjLeaHVb0311x1MDAxZVbCkpJT+CdcdTAwMDTBSI+ASy5cdTAwMDFcXNNVUiFLUa0kI0JyXHUwMDEwSTKOt1x1MDAxYZWUmlx1MDAxMIS4fDuZJEwrXrJ21TLpwFx1MDAwM4zKbdqRXHUwMDBi9zPngVxc+tFtnIS5Oq1ILWeoxaha1plZa+QyRNP17TCO3DpsQzRPxraGmFZcZs2fXHUwMDAzXHUwMDFmdE/Z2UV4tfvj58mXlnPY/jrwWuuObUalxavoNVx1MDAxN1x1MDAxMkktOPQ6vby9dbSja+DMa7JcXMiER1x1MDAwMYxcdFxiJsKsRPJLVEyyKsWMtlx1MDAwZlx1MDAwNntxb2sv3j/a3kG95LzdPvpcdTAwMTMqy1Up8dT7ft6/uj74ektCLPA35l9/Qd962fJcdTAwMTRcdTAwMWWoXHUwMDFlldlgXHUwMDE1XG6vXHUwMDE5n8RcdTAwMDJMU0qRWoBcdTAwMDTqJ3/dSVx1MDAwMPRVYClcdTAwMTjUXHRUSFxcXHUwMDE1eIr5q7lgqrxcdTAwMGJuXHUwMDExzeGHSVOnlKhoyFx1MDAwZWxM3bHWkknO2Vx1MDAxMsjhxfJcdTAwMGVFXHUwMDE1XHUwMDE07a+U91KCOUPeIXpApkHR7+3Et415q5HzXHUwMDE5SjYq509mjVx1MDAxYjWXfFM2XHUwMDE1tVNrXsomJ+dcdTAwMWNLLYiUxfTMwi77XHUwMDEyXHUwMDFlf/4s7TtCd2l6K85cdTAwMGW+65O1xy7RXHUwMDE2J7RcdTAwMDJQcyXjgFrjXHUwMDAy9WpcdTAwMTX3XHUwMDFjXafigllyXHUwMDAyXFzHi1+iYa5cYsfvq/jldvfsolx1MDAxZm3TXHUwMDA33dxcdTAwMWVcdTAwMWO3YiRJ/CdI49T7RvvZT3XXvsvwQf9kcNw7Pj3rni1NcjlWbOVFNeFqXCJulVBcdTAwMTRJvMCqTv0srTtuubKYXHUwMDE0XHUwMDE0UTiXK1x1MDAxML8qfFx1MDAwNVpcbnyn19VIWPBkpaFUVorMWVczjaXUTL1p+3nRMK1cdTAwMTPe+evqTtzpXHUwMDA2eW1cbuJcdTAwMGKm2Ea/VqW+M2RoVH1LttVYtoxcbtpcdTAwMGJcdTAwMDK/k9ZcdTAwMGKwmLg6XHUwMDBiJSSAWJD5cXy8//3k4T7TXHUwMDBmN1df0v1cdTAwMDN9mvm8v/Y4hlx1MDAwMlx1MDAxYVx1MDAxOIvrUVx1MDAwMeZSW6jSil5qz5lcdTAwMTV5zVx1MDAxMKuqpoRmXHUwMDE4I67xSlx1MDAxNmdXVkL/3P1x/n27f3ef9bOrXHUwMDBiqdFB2LtYXHUwMDBmfYRcboks0tB7kT4yOVx1MDAxMVZYK1x1MDAwMeTH+fxN53pvrjuuIL1UXHUwMDA0XHUwMDEzXHUwMDEwXHUwMDFirKHIqoJLIGVRSiD/xFx1MDAxYZVXXHUwMDAxli2Rmlggdlx1MDAxYTKKSaVpXHUwMDE54E+gU1x1MDAxMlx0glx1MDAxOXpThSRMS7xAoL5OIUHDnKCbrq7FPENcdTAwMWPGWsw19sylhVhNRewjY9Qp4WTIglx1MDAxMkKOxVx1MDAxN8hop3dcdTAwMDHXXHUwMDE0sVRSS2KszVJcdTAwMTGnVKpqRkuwslx1MDAwNFF6pfspuLYk4dzs5mBmrapmXHUwMDAzXHUwMDEzxsJcdTAwMDJcdTAwMGVXeFx1MDAxY7iQjFOslHwrsZyx30K/KOFNMzvJdvzI9aNm1bCnnXnzbHjIXHTA6Vx1MDAxYStcdTAwMTFkNVx1MDAwNHxLqMZcbtRQSVE6q2l3zLtapm5cdTAwMTl7YS9yZ1x1MDAxYjK9+VQyZFx1MDAwYllcdTAwMTQpppBGYFx1MDAwNCGKXHUwMDE1NVTJXHUwMDEyXHSGQrFCmaBcdTAwMWNLwsasXG7sNNuNw9DPwONnsVx1MDAxZmWjns1duG2Q3/LssWmHtypcdTAwMWaDXHUwMDAw9UdcdTAwMWFWXHUwMDFkc9NqOlR82yxglP8x/P77h9qzXHUwMDAxZFx1MDAwMlx0qSR+jHBcIsuXY0StvJykQ/9PvNNEsOQ3KmBS3Gej/P/CXHUwMDE0iScvt1x0jDCCkJo/qZm+oLGmXHUwMDE0Kbi01OjuXHUwMDE0yFx1MDAxNCxVrVx1MDAxZpbeYVx1MDAwN+qtbmQrrbtRi1x1MDAwMTfLMTaExFxualx1MDAwN4HfavVtSt9dKKDpgqWXz4bTXHUwMDE3tKskhIjxXHUwMDFmY0AzXGYpROroUDG6cj5cdTAwMDQ6XHUwMDA0jcBcdTAwMDJcdTAwMDHXUSU0UrzGXHUwMDEygVx1MDAxOWTUoHNMcqG5eO90uMUh6WCq3P82XHUwMDFm09qSWM0mwa0xbOSXXHUwMDBmUbEs8mNi8lKFXHUwMDE2Zk/LXCJcdTAwMWLZp/eW15T9OCZcdTAwMTaHmk0yijjXvFCDfJlRQ0lcdTAwMDfyZVJIiM6SM5bNhsjSXGZRzlx1MDAwMFxuXG7yPVba51xctD25XHUwMDA1eSAhoI9CVcrP51wiT1xiKphWciUrXHUwMDFhr0xcdTAwMTZf3Fx1MDAxZJ2TXHUwMDFlXHUwMDE3ydFARVx1MDAwMF+YsLxJpnU5X3liJfwyZpzeKFx1MDAxZLVcdTAwMDJhZfZ5c4aA9TRcdTAwMTk3wnpcXEV794Q4ObzNZyywl0ZwbGJcdTAwMDEsXHUwMDE5Z0RcdDR/dje9h7eu/Ia0ZTrekOdcIlx1MDAwMZRe2lx1MDAxOZA3hLG2INVGJtND8Fx1MDAxOTVsqVx1MDAwNMdcdTAwMTVcdTAwMThcdTAwMDBTLDTYUbffiitcdTAwMGJTU0hcdTAwMTCoeplcdTAwMTbFgtwzwVGFXHUwMDExRFx1MDAxMVvC/sl3R3BzU4thXHUwMDE2yFx1MDAxZIiiwmxcdTAwMTeEL+M1KLVUuY5akOWmN75GWI5cdTAwMWH3cKVN6lwiXHUwMDA1raFay0QmsDBAXHUwMDFmKi6I0/9cdTAwMDe+m1x1MDAxMO3mM1x1MDAxNueT+G7j6Vx0XHK707nMIN6Gc1x1MDAwMnHvu09cdTAwMWTR4k1cdTAwMWL3vtfbqd0wYT5mVSH3qSErL1x1MDAwN8GvjV//XHUwMDAzICVcdTAwMWZcdTAwMDUifQ== + + + + + annotations (prediction)clinical and family informationfilter variantspopulation databasesconclusion diff --git a/docs/usage/variantcalling/img/overview.excalidraw.svg b/docs/usage/variantcalling/img/overview.excalidraw.svg new file mode 100644 index 0000000000..0456d4356a --- /dev/null +++ b/docs/usage/variantcalling/img/overview.excalidraw.svg @@ -0,0 +1,17 @@ + + + eyJ2ZXJzaW9uIjoiMSIsImVuY29kaW5nIjoiYnN0cmluZyIsImNvbXByZXNzZWQiOnRydWUsImVuY29kZWQiOiJ4nOVdaVNcdTAwMWLLzv6eX0Hlfj2e093qRX2q3norbFx0W9jD8p5blLFccjhcdTAwMTjbsc166/z3VzJcdFx1MDAxZc/mMXhguHGqXGJ4mZFnpOeR1JL6P1x1MDAxZubmPlx1MDAwZe67jY9/zX1s3NWqrWa9V739+Fx1MDAwNz9/0+j1m502vaSGf/c7173a8J1cdTAwMTeDQbf/159/XlV7l41Bt1WtNYKbZv+62upcdTAwMGau681OUOtcXP3ZXHUwMDFjNK76/8s/v1avXHUwMDFh/9PtXFzVXHUwMDA3vWB0kkqj3lx1MDAxY3R6j+dqtFx1MDAxYVeN9qBPR/8/+ntu7j/DnyHpeo3aoNo+bzWGXHUwMDFmXHUwMDE4vjRcdTAwMTJQXHUwMDFiXHUwMDFkffZrpz1cdTAwMTTWXCJI0Eaqpzc0+4t0ukGjTq+ekciN0Sv81MeN7auDs51tU1m6XHUwMDFk1E53V9ZWXHUwMDE2tnF01rNmq7U7uG89Xolq7eK6XHUwMDE3kqk/6HUuXHUwMDFiXHUwMDA3zfrggl6XkeefPtfv0EVcdTAwMTh9qte5Pr9oN/r9sc90utVac3BPz1x1MDAxOfH05OM1+Gtu9MxcdTAwMWT9hVx1MDAxMEgjpdPSXHUwMDAxXGKp7dOr/HGJJpBeaWs8XHUwMDAwWFx0Klwi10KnRXeC5PqXbPC/kWSn1drlOYnXro/ec6rO1Onp6D23P7+t8y5w3nrnwIFxzj+946LRPL9cdTAwMTiwJEJcdTAwMDXSamGc9UaQLCFJXHUwMDFhwzuiNWhplManXHUwMDE3+PTdlfpQN/49ulxyPdKqXHUwMDE1/kT7utVcbl/Jdv3nlfylQyMtgp/P/DP6fvz+pZD2jc5w3a1XXHUwMDFmtUTSd7J8XHUwMDAxvXUjuVrN9mX09K1O7XKkWFx1MDAxZkLniil0o9VqdvuJ6myVT1NnXHRcdTAwMDJcdTAwMWQqqTC3Plx1MDAxZqxf43a/uuVcdTAwMWYuv9ZOXHUwMDFmvq2sXHUwMDFmnJ2+rT5LMUmhK85cdTAwMDbeI1gvXHUwMDEwQTv0Y1x1MDAxYa2EXHUwMDBlvEJlXHUwMDE1SmSFNy/R6EGv2u53qz3SgbhWe1x1MDAxNVdjZ6NqK1xyemckWl2E3qrJevv0mdGnnzRg8WLlwrcu785F233ePer1jzfbK0/fdEwrq71e5/bj0yv//JF83F9vXHUwMDFmNO5cdTAwMDbjXHUwMDA3ejzj9reFebF6t7N+uHvy7X710+rG9t566LA/f8swNlx0Xmo1K2NcdTAwMWKTM0xcdTAwMWPCpFqa0Sg1oNK5LS35W5fd0oxcdTAwMGJQSGWt08JcdGXkuKVcdTAwMDFcdTAwMDTCoFx1MDAxMkZcdTAwMDJcdTAwMTDu2OIszUBgjPSOrrox2qIzcctTXCJqecZcYm1Qq1x1MDAxOVx1MDAxON7YXHUwMDBi0zDD1Mo6kqrTXHUwMDFl7DZcdTAwMWaGXG5nx55drl41W/djt3WoxXRcdTAwMTWXP+3ubX9cdTAwMWN7+lOrec5cbv2xRuI2emO6PmiSz/X0hqtmvVx1MDAxZfaianSuarPd6K3kYYtOr3nebFdbe8mi0DdvfHlcIvtAmZAy9Fx1MDAxYvwqP68zbfVcdTAwMTGCXHUwMDEyjFVag9Gnn6zVOmGlMfndvGxQLI+xXHUwMDA2QmipJThF3lx1MDAxZIJ3OLpcYndDnFxuPDlcZlx1MDAwNrRcdTAwMTVagVx1MDAxNeO+n9Iy0FJKtnChvbJRYWdnv+xlOiGcXCLnT2tMst5AXHUwMDExqVx1MDAwYim8I5dd+5AsP2lcdTAwMTSUXHUwMDA3klx1MDAxMUZf4nVptFx1MDAxMCPvXHUwMDBmqr3BfLNdb7bPx1x1MDAwNftcdTAwMTn35DG9ISzUrvuPOsH6QIjtXGZcdTAwMTAuS1x1MDAxYzmMfMWqXZY69mVcdTAwMWLt+mQhzre93qrdbVxmmqubptu/XHUwMDFjrC3fXHUwMDFjJFx0UVx1MDAxMVx1MDAwMalcdTAwMTS5ouTHS3Rkfjomg1x1MDAwZYT33tKdRkJ28tJiMrWq/cFC5+qqOaBrvdVptlx1MDAwN9FrOrx4n1x1MDAxOFx1MDAxMy5cdTAwMWHV2Fxyp+9cdTAwMTR+jVx1MDAxNLRcdTAwMTmJXHUwMDEyu3zQcVx1MDAxN2n029yIXGaHfzz9/u8/XHUwMDEy360hsFx1MDAxNkm3XHUwMDA1kvfp5Iii+FEh/ab4XHUwMDA1iJCIvZD910lcdTAwMDdMs5lfh1x1MDAxYjeX0dE+hP9PQ9LMmJngJFxyTLVywiFMXHUwMDAxptlaU1x1MDAxZTBcdTAwMWRHTmFcdTAwMDJcdTAwMDZMYbwkblEy4vlQjIHWW1xuQWzBMYaUXHUwMDEwaNTgjHh8QFx1MDAxYzutXGbI1o1ccnHqT9BUjrBAoClcdTAwMDQzc4TMySHCi0OPssRcYlKlx1xiilxcVVwiZVx1MDAxMLktZe3Loentf/5xeN//MVx1MDAxOLQq7Vx1MDAxZue4VHpLUTKwxDcgki1FqcAy3JF6XHUwMDE2bCloXHUwMDAz47xXxoAjXFxcdTAwMDSMXHUwMDFiStxAJFx1MDAwMzXnpNTLLSQ7RsgyXHUwMDA1ODhecL7XbZ1uu36VTKK7eLUxXHUwMDBiU0AvLblMWtkpTCEpXHUwMDAyXHRcdTAwMDVXXHUwMDEzXCKQKkdcdTAwMTNXY3doLFxuaTXOXHUwMDA2XHUwMDE5Mcig001cdTAwMGJAxkSORlx1MDAxYlx0p81cdTAwMTdx4PN5UqYn46zlbKXLT5M33/bFXHUwMDAxdExj5XJxvrkxuPt64lbLbvxakY/yyE5cdTAwMTF+NFx1MDAwMYTBceasaCEgPyR8iicjN1x1MDAxOFx1MDAxMOrYJDqUXG6dUs7b0dcoXHUwMDAzXHUwMDFmTkqZrSi9ty9cdTAwMWa2/EpF717cXHUwMDFl3H7uV7YnZ+JcdTAwMWU/vPult7naOr5cXO7Didvaai4sXGaWTvNhS+Zxj2pcdTAwMTdHS/M/vl6uwVatc3C7r+Fh633Rt0lnb1xuTsjj0jp/Lj35JpXdgIFCNeNcZjFcdTAwMDVcdTAwMWFlIJJKJ273XG4oninUllxyp1x1MDAxMT1aoZTXXiUk1Vx1MDAxMyxZkpJoMvJcdTAwMTksXHUwMDA2vW1uLz+z8prqXFz9uttcIsJcdTAwMWM0+lx1MDAwNWX5Jlx1MDAxMFGUd1OFKpx9XHUwMDAxUtlXXHUwMDE5XHUwMDE0noPx3NZ7u1OheGTv4ka1ayfbS6vLe2eb52W3XutEoFx1MDAxYyTQL7OjKJaBfcJcdTAwMWGujaXgSVx1MDAwNKPRuFnY6SsyrrxYfzg/7199Xb9dhlx1MDAxYjt/ZvZcdTAwMWJcdTAwMGJvzbhcdTAwMGa769uiv/K1tXF08anVW9Zf1M6X98W4XG4zKFdcdTAwMGJcdTAwMTDO2fxcdTAwMDFz8l0qvdFiXHUwMDAwXHUwMDFlXGZcdTAwMDXLWlx1MDAxODLfiOli8aaLKnBEuJruOnptXHUwMDEyMkombshcdTAwMTRaW8NcdTAwMDVcdTAwMWG/XHUwMDBm4TJRzf24ptBycD/Xr3V6jb/bREn092mvOmCNLoaCJ5BRlILjYs6lSJmLkzVkmnbqXHUwMDFhnLfRJ5/oWFx1MDAxMFx1MDAwM2jpQrVcdTAwMTGTTDtcdTAwMWJDS2raxssgxsUwdp1nbcvOXHUwMDA2Llx1MDAxMuuOKqpcdTAwMDLhw+L8ylx0XHUwMDBiK8FcdTAwMTgspo6q5Fx1MDAwYmnZ3u3c2Fx1MDAxYZaw2lthhNHGXHUwMDFhlFx0XHUwMDBiafAr+1x1MDAxMfvSuVx1MDAxNtSy7XwuvKonXHUwMDAxXHUwMDExXHUwMDA1XHUwMDAyiaKk8aPE0pMsKpBOYoIo72xcdTAwMWQtrtH8qPxU5tGnP4T/f1ZcdTAwMDBhReoyXHUwMDE3UHxp1DRcdTAwMDHEXHUwMDA2rPZcdTAwMGX697tuvnYvVL+qXHUwMDBmLndvylx1MDAwZVik4FxcV4NKaG1t2Fx1MDAxM+DPa2lcdTAwMDOkIFtq57n4JpREnrlL4riYIS2h51x1MDAwM7pcdTAwMTlcdTAwMWPmaD98RFx1MDAxMc1Icme0n0VcdTAwMGX/XHUwMDE1Y4zdu9sunlxc3pyuL65ebPWWdr6f711P5bMrIUFcdTAwMTWfJVx1MDAxM+klp8RcIp6EmYLXXHUwMDEzv3TZzWSoncahtihcdTAwMDTp4biZXHUwMDAwXHUwMDA06El5Lb2vYDNRnssmXHUwMDEwKVRcdTAwMDLFUULcWuK5Mu80enSzSHo/23OfWlVf5rnfVJmkXHUwMDA2c/1mcYmyXHSIXHUwMDFm9dJTRCo8TUa4mGa+XHUwMDFlUFJcdTAwMTBcbvlXqa6Pd3B9vrl3Klfu9n7ctrvr/bvdspuvtprXoI1xQFas7fhcbrWWJkCniVx1MDAwMnm9lOtL34blXHUwMDEycmdIXHUwMDFlnUZcdTAwMDXvi9e271fvT1x1MDAxNo7Xtuzm9p71+0du7fJkSl4z4XLNolxuvGU6r5EvZDyoKYgt+VuX3TI8UtRcdTAwMDCGq/WA2S3CbEq8mmlwylx1MDAxYbxcdTAwMTfcpVx1MDAwNHQyXHUwMDFkt5CEpJThfiAri6/emKW2vozazlx1MDAxYu1cdTAwMGWr9tzf7Wq/n11r8VKKm1x1MDAwMPdRintcdTAwMTItSbBZ5J4yic7Y9NwyXHUwMDA18c5oUFPklmVje+9k9dvJ2snlevO4W+leK/O29jyx1U9rXGasXHUwMDE0UqL1moKlUMfR0JyFXGbI2K1cdTAwMDftwaBcYvVFPKfX7+ys5ms+bsqAKuClZJGYlEJcdTAwMTdcdTAwMDCYpLyUJK/WXHUwMDFiLngtXHUwMDE13T2qw9ZcdTAwMGbZXFzdWKpcdTAwMWXuLq9cdTAwMGZqg96+/VpZXHUwMDFkZ79nrel8v19u7d8uXezu7KjDjeZgs+PWXHUwMDFh+Y6bg0elQ6/DXHUwMDE2WFx1MDAwNI+CTG+9IJ9cdTAwMGKtkpjfw7w0clx1MDAxYlcr3f7y9501udjtdWuHpc+jXHUwMDE4p1x1MDAwM6eM9GglgFx1MDAxZK+iXHUwMDAwXqW1XHUwMDFhjZKevG1cdTAwMGIvsbpsXHUwMDEyXHUwMDA1XlpcIlx1MDAxMlXja8JZsSGC4DLVt1wi0DcoUPxcdTAwMTWIXHUwMDExXHUwMDBmtjg/m0ydXHUwMDA1lSmmnrzwONCFsntcdTAwMTEzNUp4y7mc3Fb6qVGRjfXmfm2+3ts52jtdqHz2vuxWimSk1pKdkksrXHUwMDA14ng3XHUwMDE0PUfMRFx1MDAxNKq0XHUwMDE32vlcdTAwMDLNNF/phFx1MDAwMaUk+d7vq1bxeKu1PG+2llx1MDAxYd2jvY1e+/Pt0Ylq5mVDsyU7u/2V9Vx1MDAxZrvfz/BIf9k7a+3MonJid/37LWyq46+Hhy2/XHUwMDA2gyNxrN1MKycgbMhFsKxLb0dcdTAwMDZcdTAwMDGSXHUwMDFirfKbb/JNKrn5elx1MDAxOHaVSaFcdTAwMWM5r/QzYr72tcyX/VvHxDnsR3RcdDnYeKRKQaJCVEa8eS/yNJo6XHUwMDFiov1VmvB3O7M4YaaR61x1MDAwNIJKo+VXq6LIjmTT+1xuXHUwMDE0t1PKMClMsvUlt99uVNFXXHUwMDBmL840foFtY1ePy27rXHUwMDEyeFxcXGbZXHUwMDBi926DXHUwMDAxOV5UYaxcctAgaJSKokkostHAXHUwMDA1Q38hceSAMT9cdTAwMTfdY7Es3SdtjHlnjVx1MDAwNqtf72/aW/VcdTAwMDdrXHUwMDFl7i76cn/h8vjS5SXZs4f929XK2mXnfFx1MDAxN3ZcdTAwMGZq3Yu6XazPgLzru1f+fHdv6fvgxFxytpdcdTAwMGW2XHUwMDE37s/1+1wib21dmkFLXplcdTAwMDPAKVqFku9SyS1am8Ba4KZcblx1MDAwNZZcdTAwMWJcdTAwMDbHXHJcdTAwMWFl4LVWaGzRXHUwMDA2LSBAXG6SXHUwMDE1L+Ry8aOMm3VSu1x1MDAwMa9cYqi3XUN9ZfquttudQZFcdTAwMWM9gZliXHUwMDFkflx08lx1MDAxNL96ml4jJL121oOV+Vx1MDAxN4k+71xcXq2vfbpcXN38sXm/+t1U70/ONstuueBUYLxcdTAwMTdcdTAwMWVcdTAwMTGdXHUwMDAwlOOOt7FcdTAwMThcdTAwMThhiFx1MDAwN5W32tpcdTAwMDKrXHUwMDFmnsfFhlx1MDAwNVdYsrTyJCrealfPxberXHUwMDFmO5tmcX3xcKF2dH9ayUuZjY450n7h5GSl0V9YrHfh/Ntaa1x1MDAwNlS8t//5unHUO/7UNOvd5e5db61yMduW/cKpXHUwMDE4THqZMjJcdTAwMDPpKVx1MDAxNn2Tb1LZ7Vx1MDAxOX0gwCqHmlx1MDAxN09cdTAwMWRGfGuEgKdeXGJ0Rdsz6MB6j8bz3dfC5uz8M0pcdTAwMTktfiMmbjLPdnuNQtl4XHUwMDAyN0XZOE2mWTByxvxLJ1T02V/m69h0XHT/8zvSp8s9WLz7/NBtV27W8PjT1uV3/FJ281XOUGyMPDNcdOhcdTAwMWaa8UVe8kpcdTAwMDLhtFx1MDAwNCGkdChcdTAwMDRGJHv18ZdIUlx1MDAwMNpZXHUwMDE47Fxmx19O4t9L6Sv1rbXNjmqLxnW1XHUwMDAx93q+npcnXzDPI/O4xffcXHUwMDE3XHUwMDFmXG5jetVcdTAwMTVPmNRKY/7kVvJdKrtcdTAwMDX74aRcdTAwMWFe9pTGYaRcdTAwMDFQqoBcdTAwMTd9nFx1MDAwMlxukummXHUwMDE0Zr5GXHUwMDA0wENcdTAwMDNcdTAwMTX51T55pKaO5bGlI3Dx5De8e/rNP1Nz/tPG3J9/t1x1MDAxN3Y+bVx1MDAxNMS9XHUwMDEziCjKvUOB5sblycW7kD1eM4t306uIudrISDVFXHUwMDE43JRX5stas3J/0t24XHUwMDE1+5tLnc/fS5/AQnKbvVx1MDAwMXYyhk1hkVwiXHUwMDBmMlt6XHUwMDA3kbOR9Cbhistg5Z067b3SuqCwtzDaXZxf+37leqeV3cuV8zPnavrbl41XaJDPPO5cdTAwMGKKv8pCu1x1MDAwNtOrP7xBg3aKJaXkm1Ry+/VSXHUwMDA0XHUwMDBliLuk1eQki3G3XHUwMDE5QFx1MDAwNuKxYlx1MDAwZYTy0lx1MDAxN2e/z+Jdr6VSXGLvP/9cXCbancBDJaBdXGbd8Cjtak33XHUwMDAzpmmqXHUwMDFm3O49fKtcdTAwMWXj2fb5/f1cdTAwMGZ5Wdu5+2HLbrdcdTAwMDAmkNyfXG5cdTAwMTYtMWuke0dBoIlwldRCXHUwMDEy62Jx6SqbmHSO8672hCEzXHUwMDE5KPmKvHt0czxYWV666fRPvmP/Zv/bWnV/51x1MDAxNYqYM4/7gnKwsvCuNan5Kk1cdTAwMDGekCjy95gn36Sy26+BwPEsXFzJdWqI0e5ZTcQruPvIS9TaXHUwMDE0l65SjqfeeFx1MDAwYp78dMNdeHF7TthDwnvU3Dry+1x1MDAxMO+3heWCXGJ3XHUwMDAyXHUwMDAxRVx0d0yQWexcdTAwMWaRwbRSyNSVXiXJYbNEQfkrLJvHXHUwMDA39Ss8Xals7nRcdTAwMTb94dnRSdeVfp5cXEW7QPK8eeGtRzm2XHUwMDE09qvGUkmOcoH3XvIv6lx1MDAxZppcdTAwMDHXsvPDk7TeXHUwMDE51T5syKv+pV//tNPzxydr++v1rfNXqGTOPO5cdTAwMGKKt8pCtShSV3a1XHUwMDEyQ1HyM23iPSq7+SpcZlxmXHUwMDAw0SxxLnnDKuIqa1wiYo6AXHUwMDA1XHUwMDE5rzJcdTAwMWWKc5WfR7WggVx1MDAwYjXefrum/1x1MDAwNqqdQEBvSrUqNaile4Co3Vx1MDAxNK1cZl44vfrwsOLO/eHnjYWDva3Bwm3ZLdU6bpFcdTAwMTXCotGamHQ8XHUwMDE3ZaymV5XmJllcdTAwMTWevjP7cTIjsUaNSCFcdTAwMGb8185L3nC95FvNi3tcdTAwMWXNqvX5qlqr9VdcdTAwMGZcdTAwMDfu7LPunS5ttOE3KnSSIfiPVkpcdTAwMTg/XHUwMDFjnZ0/45t8MctuZd5cdTAwMDdcXKCJXHUwMDBlhLM2REKPdU4mcFKgXHUwMDA1TvvaXHUwMDAyNy+0hszZe81ypO1dXHUwMDE4821cdTAwMWTv3WXDXHUwMDE2+k7JcJqC4/5tmPJmyodcdTAwMTN4XCJebDwuS8FlTTKMrlHnlXf/0mqKXHUwMDA2ge/bXHUwMDFinZvbpYbtK+j2Kra6Z8xR2a1VaVx1MDAxNWgjkFx1MDAxNM/SV0ZcdTAwMTVt+cFAad6shyeKh3uV3yb2VJJcdTAwMGKwhMdy7eo7iVx1MDAxNTfxy8HhQ+vLXHUwMDFh+If95nm/pr9d51x1MDAwZT5f0ImTedxcdTAwMTeUK5eFbcPrXHUwMDEyUZ92WJbufH77Tb5JZbdfK1x1MDAwM4HeK6B4z1x1MDAxOFx1MDAxYqlLXHUwMDFj0i1FhNI6RzFegfb7vOCT96o0xrn336BbhuhzXHUwMDAyXHUwMDAzXHUwMDE1XHUwMDFlfaZcdTAwMGYpTy2DkEJ7ocQ0Sd7sMs+y2qlcdTAwMWFcdTAwMGUhtERgMNxcdTAwMTl53C1WoFx1MDAwMkuXwWjg4UGiODs1hk4klefh8NZcdTAwMTCVJox8g4BYVntNYTBcYqncaO3oXHUwMDE3XHJb5dGIYkYkzmCW+bOMOecs8+z9XCLnxmaZ081Ep1x1MDAxZJJMXHUwMDBlXHUwMDEwRlx1MDAxODH3tCmw5L3eheVcYsXzcvszd1x0zq5lXHUwMDFjl0pcdTAwMTJIeyCUXHUwMDA2Z5z0NkGqmFx1MDAxMO9snHm6lvMjpt+jw31cYv8/Nc5hetui01x1MDAxZXGq/dCzy85LinNcdTAwMDRugVx1MDAwN6RYgjdkhujWZmBcdTAwMDJSPFx1MDAxZcvBPV+6uCSbgcDyoCsnNV10XHUwMDEx2lx1MDAxZvVcdOXizohcdTAwMDatKfbHsm7RUCSs5Vx1MDAwNlx1MDAxMOIxdMDbivAmdUbjqDcoXHUwMDAzP3KB2FTbRFx1MDAwMCjLU/KRXGZLSVx1MDAxN1x1MDAwNzFuqFNIJOVcZqmCIIx996iWptSRT79cZsSsy5g8b1CQXHUwMDAwU1TPZFdcdTAwMDeXXHUwMDE0xdDZQFx08sToWmhBXGY+jmJcdTAwMWPrXHUwMDAwXHUwMDAwenCkeVx1MDAxYYurnrE64JmhaK23WoFIXGKqUFx1MDAwN1x1MDAxMoVcdTAwMTCErYbwK+arcbOL4/mkhexGXnJUy73XXHUwMDBiIYoxXHUwMDE0JVx1MDAwYmGkIVx1MDAwYlx1MDAxMz6OapaYTWtcdTAwMDKVR52Q8S70XFwol13/Ooa03OTBXHUwMDAxNYmleKt5jMkk2ZvTPORKWEZBZ989yNFlNqTu5C678CxwfkhF7oMgZ9ogb/QsvZp0tFRcdTAwMDPiR9R0ZoWgdKJUXHUwMDA0tcSdSGFBflx1MDAwNM3ugygpgpLaXHUwMDA2w5VcdTAwMTfjnDZKRur+Pd1IoNhI6eHU8Vx1MDAwMvt2SNSA02LcfGBcYlx1MDAwNUf4OEozS1x1MDAxNlx1MDAxNS15XG5ek8yh5r9fc6VcYlx1MDAwZcir9SWEUJ6pqUbKNHtcYs1ccldcdTAwMWPv0uUjZkRcdTAwMWX+wTPlXehtT06Ze4w9rSBcdTAwMTP3z4PQ7DHhY1x1MDAxMEq46KRFuv1cdTAwMWEoJrFxWMeAXrKCe2BcdTAwMTn/XHUwMDFkvndcdTAwMDStSM2jJujbsNerrFx1MDAxN2Mgqi1BXCI5j1x1MDAwNIjeeaNh8vHSbIhcdTAwMWYx65lcdTAwMTWKgkmNplFx8K5E/qX07GL5koIo145JJNrnLIUnTY5cdTAwMTSGXHUwMDFhXHUwMDExOFwifnJcdTAwMGac0d5cdTAwMTRcdTAwMTdNa1x1MDAxMXhN3lx1MDAwNVlcdJl2SJ9C0XQghzZtSbXAW7AqoTnSWWN8KFb7jWA0N2Rxgk5oXHRyuMkh3/Y4ZJmAwEpcdTAwMDNphiSpKf59XHUwMDFljGZcdTAwMTeGj8f8Qouhd8yLNTynJr4to1xuJI9286ySpCg+7lx1MDAxZb83XHUwMDFjTVX74cfjXG4/K+STLqPtW3K1XHUwMDFl2Pz+Y3Y/T0mhj8LrXHUwMDAwJG+PyGNcdTAwMGZCmfOfyCdcdTAwMDPH3KbpXHUwMDFlkF+mXCJyzVx1MDAxMPmILIdcdTAwMDVEIFx1MDAwNYGX9Vx0ZVxuRI5cdTAwMWPIUnRBLGhUaMvS0fQkyXNV7W9cdTAwMTmD50ZcdTAwMTnO6mliM6mQriVPt8PRtlxyoYiXQjavyN5cZoEjN2M+XHUwMDBm/LJnXHUwMDE1R8WSwE3MXHUwMDFhUVIwMDLPkF/LiSBPYOx5Q0L9X1x1MDAwMH6pmj98Oa70s0I/ZdPzj2CdM1x1MDAxMC6in7j1ZWbrRknRzyH529aiIXRTkjz5XGL6cUuBslo5bSl49sW1XHUwMDE0XHUwMDE45LGt1ikyRVx1MDAwZY+SNlxiI5Ik78B4XHLsiXhcdTAwMTefiuGHxiz977hcXDxccszw/GpScUGRljBcdTAwMThad59cdTAwMWKlICnoIZDkRSqvtHhuXG4ys00hin2ewnVygaQxdFx1MDAxM+PQJ1x1MDAwM7qK5P0g965ZXHLvXHUwMDFm+tK1fvhyXFzhZ1x1MDAwNX0mvVDGXHUwMDEywjrSjPx1MtnNZSVFvlxu0LVHinktfVXn0ES6NFxi7XizNWJcdTAwMDHJfFxc4Mhq7Vx1MDAwMm3pRMpcdTAwMTN8kSRcdCvIXHUwMDE0olx1MDAwNVx1MDAxNsBx3Z1w7LrEXHUwMDFjP8UwQ5r0Oy4p50ZcdTAwMTnBdMbpXHUwMDFjXHUwMDFlwEas50JcdTAwMGKbT1x1MDAxMS/PMvRG0M1AMpNnXHUwMDAyX/bw63HgY1x1MDAxZESK/4B4Vvq4I2pcdTAwMDLFu2B6+skjYuKpzHdcdTAwMDZ7XHUwMDEw0OWn2J6us+bfMPxp71x1MDAwMtQ8XHUwMDA22lx1MDAxYaVcdTAwMWPYSVx1MDAwN0u1XHUwMDFlfsTtZlZcdTAwMDDqUptRrZNcXM0/RfdNdt11SfFTXCJdeHIjeJNcdTAwMGWQxkWmhvNcZkRAaXmcLypVYMbQ8TxUZ8iaee3SQnLK0HNeXiMn5o20XHUwMDEwn1wiPlxcO3qzIaZviZ65oYpcXDQrkUuuLSjFIGlC/sqT40hmaL3gpVx1MDAxN6XI9J6Hn9lcdTAwMTXGsaCZXHUwMDAyNcdcdTAwMWI4kKdcdTAwMDQ67jm6wFsg7fDSWHYhRVxcqneGoelqP/x4XFzhZ1x1MDAwNnypayXkkVx1MDAwYsnRYm7gy25cZikp8Fx1MDAxMdiRXHUwMDFkgNJKXHUwMDBmNyRcdTAwMWKf78rApyxcdTAwMTB/kVx1MDAwMSBAcVx1MDAxNTtGXHUwMDA1XHUwMDEyyS+w5KPwzrpJsMchOzm5mlx1MDAxOZW4Nr49KJmMVzPpQnx3sDdcdTAwMTXCUFx1MDAxNMzzx0GRrVx1MDAxOVx1MDAwMy5eXHUwMDFm47hiznkkh4ZJ76nGY0rcy1x1MDAxZdNcdTAwMWWRykgveGygNDyRLFx1MDAwZXsmgGHWmjxeiVx1MDAwMlxc/I68M9RL0/nhh2PaPivIU1wiPU9o6Vxc5Lib/HnC7N7zkoKeYb5R3FZcIniv40ie0Hg33JyFXHUwMDFirYUxXHUwMDA1rpJcdTAwMTBUXHUwMDA1PFXNOkGRkFx1MDAwYrfVhmGPbJRcdTAwMTPpXHUwMDE0XHUwMDE1kGdcdTAwMWFcdTAwMGKVXHUwMDFkPcglhbKukUyzpfe0qJdcdTAwMWJfhvBcIoaT/TlcdTAwMTeuUYk4wFiK1Vx1MDAxY7lcdTAwMTmEMTw+Sz0zWM5u3o5cYqV4e/mhLnKWKp65pIiPXuJqL8I9IGx876CXrvP8qETVfUrUS+t4VenjlnjUn0ZcdTAwMDNcIj/orW0sn6w/2NWV47PqYWOhsvh9ffO67KCngctcdTAwMWZcdTAwMDRcdTAwMTmB0d6Ds+OunuSCJKt5nYj9PfpZXHUwMDFj7HE7XHUwMDBi71xuQrClXHRlMVx1MDAwMfZcdTAwMTJmTJDQvF42XHUwMDAznHvZ5u8jXGIuesZEt9eY6/Y6NZL61fd+Tzv3VLMlPvyEgY/Vbnd3QNfyXHQ26eY16z8vyEiOjzfNxu18XFyd/nU2fHAr/dDy2cRcdTAwMWFDxvrnwz//XHUwMDBmNT9QNSJ9 + + + + + FASTQalignmentmark duplicatesbase quality scorerecalibrationvariant sitesgenotype assignmentvariant callingvariant quality score recalibrationannotationinterpretationBAM /CRAMBAM /CRAMVCFVCFanswerVCFpre processing diff --git a/docs/usage/variantcalling/img/sarek_subway.png b/docs/usage/variantcalling/img/sarek_subway.png new file mode 100644 index 0000000000..e2a689b1ca Binary files /dev/null and b/docs/usage/variantcalling/img/sarek_subway.png differ diff --git a/docs/usage/variantcalling/interpretation.md b/docs/usage/variantcalling/interpretation.md new file mode 100644 index 0000000000..0e3b1e3954 --- /dev/null +++ b/docs/usage/variantcalling/interpretation.md @@ -0,0 +1,126 @@ +--- +order: 4 +--- + +# Interpretation + +Once variants have been called, the following steps depend on the type of study and the experimental design. +For large population studies, like case-control association analyses, an appropriate large-scale statistical approach will be chosen and different statistical or analytical tools will be used to carry out the tertiary analyses. + +When only a few individuals are involved, and in particular in clinical contexts, the goal will be to interpret the findings in light of different sources of information and pinpoint a causative variant for the investigated phenotype. + +## Overview + +When variants have been called, and a diagnosis is necessary, investigators will need to combine: + +- the predictions resulting from annotations like the one we carried out +- biological and clinical information + +with the goal of narrowing the search space and reducing the number of variants to be inspected. +This approach is summarised in the diagram below: + +![interpretation](./img/interpretation.excalidraw.svg) + +Once the list of variants has been reduced, more in-depth analyses of the reported cases and the genomic region in existing databases might be useful to reach a conclusion. + +## Finding Causative Variants + +Some of these steps might be carried out via software. For this tutorial however, we chose to perform these steps one by one in order to get a better view of the rationale behind this approach. + +We will start by looking at the annotated VCF, which is found at this location in our GitPod environment: + +```bash +cd /workspace/gitpod/training/annotation/haplotypecaller/joint_variant_calling +``` + +Here, we should verify in which order the two samples we used for this analysis have been written in the VCF file. We can do that by grepping the column names row of the file, and printing at screen the fields from 10th onwards, i.e. the sample columns: + +```bash +zcat joint_germline_recalibrated_snpEff.ann.vcf.gz | grep "#CHROM" | cut -f 10- +``` + +This returns: + +```bash +case_case control_control +``` + +showing that case variants have been written in field 10th and control variants in field 11th. + +Next, in this educational scenario we might assume that an affected individual (case) will carry at least one alternative allele for the causative variant, while the control individual will be a homozygous for the reference. +With this assumption in mind, and a bit of one-liner code, we could first filter the homozygous for the alternative allele in our case, and then the heterozygous. + +In this first one, we can use the following code: + +```bash +zcat joint_germline_recalibrated_snpEff.ann.vcf.gz | grep PASS | grep HIGH | perl -nae 'if($F[10]=~/0\/0/ && $F[9]=~/1\/1/){print $_;}' +``` + +which results in the following variant. + +```bash +chr21 32576780 rs541034925 A AC 332.43 PASS AC=2;AF=0.5;AN=4;DB;DP=94;ExcessHet=0;FS=0;MLEAC=2;MLEAF=0.5;MQ=60;POSITIVE_TRAIN_SITE;QD=33.24;SOR=3.258;VQSLOD=953355.11;culprit=FS;ANN=AC|frameshift_variant|HIGH|TCP10L|ENSG00000242220|transcript|ENST00000300258.8|protein_coding|5/5|c.641dupG|p.Val215fs|745/3805|641/648|214/215||,AC|frameshift_variant|HIGH|CFAP298-TCP10L|ENSG00000265590|transcript|ENST00000673807.1|protein_coding|8/8|c.1163dupG|p.Val389fs|1785/4781|1163/1170|388/389|| GT:AD:DP:GQ:PL 1/1:0,10:10:30:348,30,0 0/0:81,0:81:99:0,119,1600 +``` + +Now we can search for this variant in the [gnomAD database](https://gnomad.broadinstitute.org), which hosts variants and genomic information from sequencing data of almost one million individuals (see [v4 release](https://gnomad.broadinstitute.org/news/2023-11-gnomad-v4-0/)). + +In order to search for the variant we can type its coordinates in the search field and choose the proposed variant corresponding to the exact position we need. See the figure below: + +![gnomad search](./img/gnomad_search.png) + +the resulting [variant data](https://gnomad.broadinstitute.org/region/21-32576780-32576780?dataset=gnomad_r4) show that our variant is present, and that it's been described already in [ClinVar](https://www.ncbi.nlm.nih.gov/clinvar/), where the provided interpretation (Clinical Significance) is "Benign". + +We can see the resulting table in the following image: + +![gnomad results](./img/gnomad_var_present.png) + +Quite importantly, the gnomAD database allows us to gather more information on the gene this variant occurs in. We can inspect the so called "constraint data", by clicking on the gene name and inspecting the "constraint" table on the top right of the page. + +![constraint](./img/gnomAD_constraint.png) + +This information gives us a better view of the selective pressure variation on this gene might be subject to, and therefore inform our understanding of the potential impact of a loss of function variant in this location. + +In this specific case however the gene is not under purifying selection neither for loss of function variants (LOEUF 0.89) nor for missense ones. + +We can continue our analysis by looking at the heterozygous variants in our case, for which the control carries a reference homozygous, with the code: + +```bash +zcat joint_germline_recalibrated_snpEff.ann.vcf.gz | grep PASS | grep HIGH | perl -nae 'if($F[10]=~/0\/0/ && $F[9]=~/0\/1/){print $_;}' +``` + +This will results in the following list of variants: + +```bash +chr21 44339194 rs769070783 T C 57.91 PASS AC=1;AF=0.25;AN=4;BaseQRankSum=-2.373;DB;DP=84;ExcessHet=0;FS=0;MLEAC=1;MLEAF=0.25;MQ=60;MQRankSum=0;POSITIVE_TRAIN_SITE;QD=3.41;ReadPosRankSum=-0.283;SOR=0.859;VQSLOD=198.85;culprit=FS;ANN=C|start_lost|HIGH|CFAP410|ENSG00000160226|transcript|ENST00000397956.7|protein_coding|1/7|c.1A>G|p.Met1?|200/1634|1/1128|1/375||,C|upstream_gene_variant|MODIFIER|ENSG00000232969|ENSG00000232969|transcript|ENST00000426029.1|pseudogene||n.-182T>C|||||182|,C|downstream_gene_variant|MODIFIER|ENSG00000184441|ENSG00000184441|transcript|ENST00000448927.1|pseudogene||n.*3343T>C|||||3343|;LOF=(CFAP410|ENSG00000160226|1|1.00) GT:AD:DP:GQ:PL 0/1:8,9:17:66:66,0,71 0/0:67,0:67:99:0,118,999 +chr21 44406660 rs139273180 C T 35.91 PASS AC=1;AF=0.25;AN=4;BaseQRankSum=-4.294;DB;DP=127;ExcessHet=0;FS=5.057;MLEAC=1;MLEAF=0.25;MQ=60;MQRankSum=0;POSITIVE_TRAIN_SITE;QD=0.51;ReadPosRankSum=0.526;SOR=1.09;VQSLOD=269.00;culprit=FS;ANN=T|stop_gained|HIGH|TRPM2|ENSG00000142185|transcript|ENST00000397932.6|protein_coding|19/33|c.2857C>T|p.Gln953*|2870/5216|2857/4662|953/1553||;LOF=(TRPM2|ENSG00000142185|1|1.00);NMD=(TRPM2|ENSG00000142185|1|1.00) GT:AD:DP:GQ:PL 0/1:48,22:71:44:44,0,950 0/0:51,0:51:99:0,100,899 +chr21 45989090 . C T 43.91 PASS AC=1;AF=0.25;AN=4;BaseQRankSum=2.65;DP=89;ExcessHet=0;FS=4.359;MLEAC=1;MLEAF=0.25;MQ=60;MQRankSum=0;QD=2.58;ReadPosRankSum=-1.071;SOR=1.863;VQSLOD=240.19;culprit=FS;ANN=T|stop_gained|HIGH|COL6A1|ENSG00000142156|transcript|ENST00000361866.8|protein_coding|9/35|c.811C>T|p.Arg271*|892/4203|811/3087|271/1028||;LOF=(COL6A1|ENSG00000142156|1|1.00);NMD=(COL6A1|ENSG00000142156|1|1.00) GT:AD:DP:GQ:PL 0/1:10,7:18:51:52,0,51 0/0:70,0:70:99:0,120,1800 +``` + +If we search them one by one, we will see that one in particular occurs on a gene (COL6A1) which was previously reported as constrained for loss of function variants in the database version 2.1: + +![col6a1v2](./img/gnomAD_COL6A1_v2.1.png) + +while the version 4.0 of the database, resulting from almost one million samples, reports the gene as _not_ constrained: + +![col6a1v4](./img/gnomAD_COL6A1_v4.0.png) + +We can search for this variant in ClinVar by using an advanced search and limiting our search to both chromosome and base position, like indicated in figure below: + +![clinvar search](./img/clinvar_search.png) + +This will return two results: one deletion and one single nucleotide variant C>T corresponding to the one we called in the case individual: + +![clinvar results](./img/clinvar_results.png) + +If we click on the nomenclature of the variant we found, we will be able to access the data provided with the submission. In [this page](https://www.ncbi.nlm.nih.gov/clinvar/variation/497373/) we can see that multiple submitters have provided an interpretation for this nonsense mutation (2 stars). +Under the section "Submitted interpretations and evidence" we can gather additional data on the clinical information that led the submitters to classify the variant as "pathogenic". + +## Conclusions + +After narrowing down our search and inspecting genomic context and clinical information, we can conclude that the variant + +```bash +chr21 45989090 C T AC=1;AF=0.25;AN=4;BaseQRankSum=2.37;DP=86;ExcessHet=0;FS=0;MLEAC=1;MLEAF=0.25;MQ=60;MQRankSum=0;QD=2.99;ReadPosRankSum=-0.737;SOR=1.022;VQSLOD=9.09;culprit=QD;ANN=T|stop_gained|HIGH|COL6A1|ENSG00000142156|transcript|ENST00000361866.8|protein_coding|9/35|c.811C>T|p.Arg271*|892/4203|811/3087|271/1028||;LOF=(COL6A1|ENSG00000142156|1|1.00);NMD=(COL6A1|ENSG00000142156|1|1.00) GT:AD:DP:GQ:PL 0/1:8,6:15:40:50,0,40 0/0:70,0:70:99:0,112,1494 +``` + +is most likely the causative one, because it creates a premature stop in the COL6A1 gene, with loss of function variants on this gene known to be pathogenic. diff --git a/docs/usage/variantcalling/introduction.md b/docs/usage/variantcalling/introduction.md new file mode 100644 index 0000000000..78e9c79075 --- /dev/null +++ b/docs/usage/variantcalling/introduction.md @@ -0,0 +1,45 @@ +--- +order: 1 +--- + +# Variant Calling Tutorial + +These pages are a tutorial workshop for the [Nextflow](https://www.nextflow.io) pipeline [nf-core/sarek](https://nf-co.re/sarek). + +In this workshop, we will recap the application of next generation sequencing to identify genetic variations in a genome. You will learn how to use the pipeline sarek to carry out this data-intensive workflow efficiently. We will cover topics such as experimental design, configuration of the pipeline and code execution. + +Please note that this is not an introductory workshop, and we will assume some basic familiarity with Nextflow. + +By the end of this workshop, you will be able to: + +- understand the key concepts behind variant calling, as adopted in this pipeline +- analyse simple NGS datasets with the sarek workflow +- customise some of its features for your own variant calling analyses +- integrate different sources of information to identify candidate variants +- make a hypothesis about variant interpretation using the output of sarek + +Let's get started! + +## Running with Gitpod + +In order to run this using GitPod, please make sure: + +1. You have a GitHub account: if not, create one [here](https://github.com/signup) +2. Once you have a GitHub account, sign up for GitPod using your GitHub user [here](https://gitpod.io/login/) choosing "continue with GitHub". + +Now you're all set and can use the following button to launch the service: + +[![Open in GitPod](https://img.shields.io/badge/Gitpod-%20Open%20in%20Gitpod-908a85?logo=gitpod)](https://gitpod.io/#https://github.com/lescai-teaching/sarek-tutorial) + +## Additional documentation + +- You can find detailed documentation on **Nextflow** [here](https://www.nextflow.io/docs/latest/) +- You can find additional training on [these pages](https://training.nextflow.io) + +## Credits & Copyright + +This training material has been written by [Francesco Lescai](https://github.com/lescai) during the [nf-core](https://nf-co.re) Hackathon in Barcelona, 2023. It was originally meant as a contribution for the nf-core community, and aimed at anyone who is interested in using nf-core pipelines for their studies or research activities. + +The Docker image and Gitpod environment used in this repository have been created by [Seqera](https://seqera.io) but have been made open-source ([CC BY-NC-ND](https://creativecommons.org/licenses/by-nc-nd/4.0/)) for the community. + +All examples and descriptions are licensed under the [Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International License](http://creativecommons.org/licenses/by-nc-nd/4.0/). diff --git a/docs/usage/variantcalling/sarek.md b/docs/usage/variantcalling/sarek.md new file mode 100644 index 0000000000..9e071937cd --- /dev/null +++ b/docs/usage/variantcalling/sarek.md @@ -0,0 +1,158 @@ +--- +order: 3 +--- + +# Using Sarek for Variant Calling + +In order to carry out a germline variant calling analysis we will use the nf-core pipeline [sarek](https://nf-co.re/sarek/3.3.2). + +## Overview + +The pipeline is organised following the three main analysis blocks we previously described: pre-processing, variant calling and annotation. + +![sarek_overview](./img/sarek_subway.png) + +In each analysis block, the user can choose among a range of different options in terms of aligners, callers and software to carry out the annotation. +The analysis can also start from different steps, depending the input available and whether it has been partially processed already. + +## Experimental Design + +In order to choose the different options Sarek offers, the user should collect a few key elements of the experimental design before beginning the analysis. + +### Library design + +If the experiment used a capture (or targeted) strategy, the user will need to make sure the `bed` file with the target regions is available. +This file will be useful if the user wants to limit variant calling and annotation to those regions. +In this case the file can be passed to Sarek command line using the `--intervals target.bed` parameter. +Should the sequencing strategy be a _whole exome_ or _panel_, the pipeline gives the possibility to enable specific settings for this library type, using the parameter `--wes`. + +### Reference genome + +nf-core pipelines make use of the Illumina iGenomes collection as [reference genomes](https://nf-co.re/docs/usage/reference_genomes). +Before starting the analysis, the user might want to check whether the genome they need is part of this collection. +They also might want to consider downloading the reference locally, when running on premises: this would be useful for multiple runs and to speed up the analysis. In this case the parameter `--igenomes_base` might be used to pass the root directory of the downloaded references. + +One might also need to use custom files: in this case the user might either provide specific parameters at command line, or create a config file adding a new section to the `genome` object. See [here](https://nf-co.re/docs/usage/reference_genomes#custom-genomes) for more details. + +We will follow this specific approach in this tutorial, since the data we will be using have been simulated on chromosome 21 of the Human GRCh38 reference, and we have prepared fasta, indexes and annotation files containing only this chromosome locally. + +### Input files + +The input data should be provided in a CSV file, according to a format that is largely common for nf-core pipelines. +The format is described in the [sarek usage page](https://nf-co.re/sarek/3.3.2/docs/usage#input-sample-sheet-configurations). + +## GATK Best Practices + +During this tutorial we will use the options Sarek offers to follow the [GATK best practices workflow](https://gatk.broadinstitute.org/hc/en-us/articles/360035535932-Germline-short-variant-discovery-SNPs-Indels-). + +This is solely for educational purposes, since the tutorial dataset includes only 2 samples: while joint-genotyping is a valid choice, the use of soft filtering for such a limited dataset will not offer significant improvements. Additionally, running VQSR on a small dataset will incur in issues with some annotations and will require limiting this step to fewer parameters than usual. + +## Running Sarek Germline + +In the following sections we will first prepare our references, then set our computational resources in order to be able to run the pipeline on a gitpod VM, edit the filtering settings and finally run the pipeline. + +### Reference Genome + +Following the considerations above, we will first of all edit the `nextflow.config` file in our working directory to add a new genome. +It is sufficient to add the following code to the `parameters` directive in the config. + +```groovy +igenomes_base = '/workspace/gitpod/training/data/refs/' +genomes { + 'GRCh38chr21' { + bwa = "${params.igenomes_base}/sequence/Homo_sapiens_assembly38_chr21.fasta.{amb,ann,bwt,pac,sa}" + dbsnp = "${params.igenomes_base}/annotations/dbsnp_146.hg38_chr21.vcf.gz" + dbsnp_tbi = "${params.igenomes_base}/annotations/dbsnp_146.hg38_chr21.vcf.gz.tbi" + dbsnp_vqsr = '--resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_146.hg38_chr21.vcf.gz' + dict = "${params.igenomes_base}/sequence/Homo_sapiens_assembly38_chr21.dict" + fasta = "${params.igenomes_base}/sequence/Homo_sapiens_assembly38_chr21.fasta" + fasta_fai = "${params.igenomes_base}/sequence/Homo_sapiens_assembly38_chr21.fasta.fai" + germline_resource = "${params.igenomes_base}/annotations/gnomAD.r2.1.1.GRCh38.PASS.AC.AF.only_chr21.vcf.gz" + germline_resource_tbi = "${params.igenomes_base}/annotations/gnomAD.r2.1.1.GRCh38.PASS.AC.AF.only_chr21.vcf.gz.tbi" + known_snps = "${params.igenomes_base}/annotations/1000G_phase1.snps.high_confidence.hg38_chr21.vcf.gz" + known_snps_tbi = "${params.igenomes_base}/annotations/1000G_phase1.snps.high_confidence.hg38_chr21.vcf.gz.tbi" + known_snps_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_phase1.snps.high_confidence.hg38_chr21.vcf.gz' + known_indels = "${params.igenomes_base}/annotations/Mills_and_1000G_gold_standard.indels.hg38_chr21.vcf.gz" + known_indels_tbi = "${params.igenomes_base}/annotations/Mills_and_1000G_gold_standard.indels.hg38_chr21.vcf.gz.tbi" + known_indels_vqsr = '--resource:mills,known=false,training=true,truth=true,prior=10.0 Mills_and_1000G_gold_standard.indels.hg38_chr21.vcf.gz' + snpeff_db = '105' + snpeff_genome = 'GRCh38' + } +} +``` + +### Computing resources + +Based on the choices we made when starting up the gitpod environment, we recommend to use the following additional parameters. +They can also be added to the parameters directive in the config file we just edited. + +```groovy +params { + use_annotation_cache_keys = true +} + +process { + resourceLimits = [ + cpus: 2, + memory: '6.5GB', + time: '2.h' + ] +} + +``` + +The parameter `use_annotation_cache_keys` allows the annotation software to deal with the local paths when the cache is downloaded on the environment. + +### Filtering parameters + +As we mentioned earlier, we will be using the VQSR filtering tool once the variants have been called. +However, this tool should be used to take advantage of larger amount of variant annotations and improve filtering: when a small tutorial dataset is used, some of the annotations will not have sufficient data or might even have no variance. +In order to account for this, we have to change the filtering options and limit this approach to a subset of variant annotations. + +We can do this by editing the process descriptors for the Sarek modules running VQSR for both single nucleotide variants and insertion/deletions. + +```groovy +process { + withName: 'VARIANTRECALIBRATOR_INDEL' { + ext.prefix = { "${meta.id}_INDEL" } + ext.args = "-an QD -an FS -an SOR -an DP -mode INDEL" + publishDir = [ + enabled: false + ] + } + + withName: 'VARIANTRECALIBRATOR_SNP' { + ext.prefix = { "${meta.id}_SNP" } + ext.args = "-an QD -an MQ -an FS -an SOR -mode SNP" + publishDir = [ + enabled: false + ] + } +} +``` + +### Launching the pipeline + +Now we are ready to launch the pipeline, and we can use the following command line: + +```bash +nextflow run nf-core/sarek -r 3.4.0 \ +--input /workspace/gitpod/training/data/reads/sarek-input.csv \ +--outdir . \ +--tools haplotypecaller,snpeff \ +--genome GRCh38chr21 \ +--joint_germline \ +--intervals /workspace/gitpod/training/exome_target_hg38_chr21.bed \ +--wes +``` + +Notice that we have selected `--joint_germline` to enable the joint-genotyping workflow, we have specified our library strategy is using a capture with `--wes` and we have provided a bed file with the targets with `--intervals`. +The target file in this case refers to the capture intervals on chromosome 21 only, where the data have been simulated. + +The whole pipeline from FASTQ input to annotated VCF should run in about 25 minutes. + +Our final VCF file will be located in + +```bash +./annotation/haplotypecaller/joint_variant_calling +``` diff --git a/docs/usage/variantcalling/theory.md b/docs/usage/variantcalling/theory.md new file mode 100644 index 0000000000..e8e8a14b80 --- /dev/null +++ b/docs/usage/variantcalling/theory.md @@ -0,0 +1,140 @@ +--- +order: 1 +--- + +# Calling Variants on Sequencing Data + +Before we dive into one of the nf-core pipelines used for variant calling, it's worth looking at some theoretical aspects of variant calling. + +## Overview + +The term "variant calling" is rooted in the history of DNA sequencing, and it indicates an approach where we identify (i.e. call) positions in a genome (loci) which are variable in a population (genetic variants). The specific genotype of an individual at that variant locus is then assigned. + +There are many different approaches for calling variants from sequencing data: here, we will look more specifically at a reference-based variant calling approach, i.e. where a reference genome is needed and variant sites are identified by comparing the reads to this reference. + +Over the years, also thanks to the work carried out by the [GATK team](https://gatk.broadinstitute.org/hc/en-us) at the Broad Institute, there has been a convergence on a "best practices" workflow, which is summarised in the diagram below: + +![overview](./img/overview.excalidraw.svg) + +In this scheme we can identify a few key phases in the workflow. Pre-processing is the first part, where raw data are handled and mapped to a genome reference, to be then transformed in order to increase the accuracy of the following analyses. Then, variant calling is carried out. This is followed by filtering and annotation. +Here we will briefly discuss these key steps, which might vary depending on the specific type of data one is performing variant calling on. + +## Alignment + +The alignment step is where reads obtained from genome fragments of a sample are identified as originating from a specific location in the genome. +This step is essential in a reference-based workflow, because it is the comparison of the raw data with the reference to inform us on whether a position in the genome might be variable or not. + +Mismatches, insertions and deletions (INDELs) as well as duplicated regions make this step sometimes challenging: this is the reason why an appropriate aligner has to be chosen, depending on the sequencing application and data type. + +Once each raw read has been aligned to the region of the genome it is most likely originating from, the sequence of all reads overlapping each locus can be used to identify potentially variable sites. Each read will support the presence of an allele identical to the reference, or a different one (alternative allele), and the variant calling algorithm will measure the weighted support for each allele. + +However, the support given by the raw data to alternative variants might be biased. For this reason, one can apply certain corrections to the data to ensure the support for the alleles is assessed correctly. This is done by performing the two steps described below: marking duplicates, and recalibrating base quality scores. + +## Marking Duplicates + +Duplicates are non-independent measurements of a sequence fragment. + +Since DNA fragmentation is theoretically random, reads originating from different fragments provide independent information. An algorithm can use this information to assess the support for different alleles. +When these measurements however are not independent, the raw data might provide a biased support towards a specific allele. + +Duplicates can be caused by PCR during library preparation (library duplicates) or might occur during sequencing, when the instrument is reading the signal from different clusters (as in Illumina short read sequencing). These latter are called "optical duplicates". + +A specific step called "marking duplicates" identifies these identical pairs using their orientation and 5' position (before any clipping), which will be assumed to be coming from the same input DNA template: one representative pair is then chosen based on quality scores and other criteria, while the other ones are marked. +Marked reads are then ignored in the following steps. + +## Base Quality Score Recalibration + +Among the parameters used by a variant calling algorithm to weigh the support for different alleles, the quality score of the base in the read at the variant locus is quite important. +Sequencing instruments, however, can make systematic errors when reading the signal at each cycle, and cannot account for errors originated in PCR. + +Once a read has been aligned to the reference, an appropriate algorithm can however compare the error rate estimated from the existing base quality scores, with the actual differences observed with the reference sequence (empirical quality), and perform appropriate corrections. +This process is called "base quality score recalibration" (BQSR). + +To calculate empirical qualities, the algorithm simply counts the number of mismatches in the observed bases. Any mismatch which does not overlap a known variant is considered an error. The empirical error rate is simply the ratio between counted errors and the total observed bases. +A Yates correction is applied to this, to avoid either dividing by 0 or dealing with small counts. + +$$ +e_{empirical} = \frac{n_{mismatches} + 1}{n_{bases} +2} +$$ + +The empirical error is expressed as a Quality in Phred-scale: + +$$ +Q_{empirical} = -10 \times log_{10}(e_{empirical}) +$$ + +Let's use a simple example like the one in the diagram below, where for illustrative purposes we only consider the bases belonging to the same read. + +![bqsr](./img/bqsr.excalidraw.svg) + +In this example we have 3 mismatches, but one is a reported variant site: we therefore only count 2 errors, over 10 observed bases. According to the approach we just explained, + +$$ +Q_{empirical} = -10 \times log_{10}(\frac{2 + 1}{10 +2}) = 6.29 +$$ + +To calculate the average reported Q score, we should sum the error probabilities and then convert them back into phred scale: + +$$ +Q_{average} = -10 \times log_{10}(\frac {0.1 + 0.1 + 0.01 + 0.1 + 0.01 + 0.01 + 0.01 + 0.1 + 0.1 + 0.1}{10}) = 11.94 +$$ + +Our empirical Q score would be 6.29, the average reported Q score is 11.94, and therefore the $\Delta = 11.94 - 6.29 = 5.65$ + +The recalibrated Q score of each base would correspond to the reported Q score minus this $\Delta$. + +In a real sequencing dataset, this calculation is performed for different groups (bins) of bases: those in the same lane, those with the same original quality score, per machine cycle, per sequencing context. +In each bin, the difference ($\Delta$) between the average reported quality and the empirical quality is calculated. +The recalibrated score would then be the reported score minus the sum of all deltas calculated in each bin the base belongs to. + +A detailed summary of this approach can be found on the [GATK BQSR page](https://gatk.broadinstitute.org/hc/en-us/articles/360035890531-Base-Quality-Score-Recalibration-BQSR-). We also found quite useful this [step by step guide](https://rstudio-pubs-static.s3.amazonaws.com/64456_4778547202f24f32b0edc325e96b061a.html) through the matematical approach. Full details are explained in the [publication](https://www.nature.com/articles/ng.806) that first proposed this method. + +## Calling Variants + +Once we have prepared the data for an accurate identification of the variants, we are ready to perform the next steps. +The most important innovation introduced some years ago in this part of the workflow, has been to separate the identification of a variant site (i.e. variant calling itself) from the assignment of the genotype to each individual. +This approach makes the computation more approachable, especially for large sample cohorts: BAM files are only accessed per-sample in the first step, while multi-sample cohort data are used together in the second step in order to increase the accuracy of genotype assignment. + +### Identifying Variants + +In this phase, which is performed on each sample independently, a first step uses a sliding window to count differences compared to the reference (i.e. mismatches, INDELs) and potentially variable regions are identified. GATK calls these "active regions". +Then, a local graph assembly of the reads is created to identify plausible haplotypes, which are aligned to the reference with a traditional alignment algorithm called "Smith-Waterman": this is used to identify variants. +For each read in an active region, the support for each of the haplotypes is counted and a likelihood score for each combination of read/haplotype is calculated. +The likelihoods at this step allow to calculate the support for each of the alleles in a variant site, and read-haplotype likelihoods are a key input for the Bayesian statistics used to determine the most likely genotype. +This first genotype assignment could be sufficient if one analysed a single sample only. + +### Assigning Genotypes + +When multiple samples are analysed, information from each of them could collectively improve the genotype assignment. +This is because the magnitude of potential biases (example: strand bias) can be better estimated, and because the distributions of those annotations used to inform the genotype assignment become more stable when more data are available, by combining multiple samples. +The use of a larger cohort also increases the sensitivity. + +This is possible if the variant calling step is run by producing a variation of the VCF file format called GVCF: this format includes, in addition to variant sites, also non-variant intervals in the genome of each sample. Moreover, it reports probability likelihoods of a non-reference symbolic allele at these non-variant intervals. +This information allows to re-genotype each sample by using data from the whole cohort. + +You can read more on the GATK website about the [logic of joint calling](https://gatk.broadinstitute.org/hc/en-us/articles/360035890431-The-logic-of-joint-calling-for-germline-short-variants). + +### Filtering Variants + +There are several ways to spot potential false positives through filtering. + +_Hard filtering_ uses pre-defined thresholds of different variant annotations (allele-depth, mapping quality and many others) in order to flag variants passing all these criteria, and those failing to meet any of them. This approach is mostly useful when calling a few samples and enough data are not available for more sophisticated solutions. + +_Soft filtering_ infers the thresholds to be applied from the data themselves. This approach uses the distributions of the annotations, and their overlap with known and validated variants: it defines those combinations of annotations which are more likely to describe true positives (the variants they refer to in the analysis cohort overlap with those validated in other databases). This approach is used by a GATK tool called Variant Quality Score Recalibration (VQSR). + +More details can be found on the [GATK VQSR page](https://gatk.broadinstitute.org/hc/en-us/articles/360035531612-Variant-Quality-Score-Recalibration-VQSR-). + +More recently, pre-trained deep learning models are also available to filter variants based on neural network architectures trained on a large number of variants from population databases. + +## Annotation + +Once the analysis has produced a final VCF file, the final step which is necessary to interpret the results is called "annotation". +This step uses different databases to describe (annotate) each variant from a genomic, biological, or population point of view. +The software used to carry out this task will add information to the VCF file such as: + +- the gene each variant overlaps with +- the transcript the variant overlaps with +- the potential biological consequence on each of those transcripts +- population frequency (minor allele frequency, described in different databases such as gnomAD) + +And several other items we can use to interpret our findings from a biological or clinical point of view. diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index 40ab65f205..0000000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,529 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //=====================================================================// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - 'version', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //=====================================================================// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - System.exit(1) - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 1714b473e4..0000000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,265 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = workflow.manifest.version - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.white} ____${colors.reset} - ${colors.white} .´ _ `.${colors.reset} - ${colors.white} / ${colors.green}|\\${colors.reset}`-_ \\${colors.reset} ${colors.blue} __ __ ___ ${colors.reset} - ${colors.white} | ${colors.green}| \\${colors.reset} `-|${colors.reset} ${colors.blue}|__` /\\ |__) |__ |__/${colors.reset} - ${colors.white} \\ ${colors.green}| \\${colors.reset} /${colors.reset} ${colors.blue}.__| /¯¯\\ | \\ |___ | \\${colors.reset} - ${colors.white} `${colors.green}|${colors.reset}____${colors.green}\\${colors.reset}´${colors.reset} - - ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100755 index 1b88aec0ea..0000000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,40 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } - - // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) - - if (conda_check_failed) { - log.warn "=============================================================================\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + - "===================================================================================" - } - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index c0760a9da4..0000000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,94 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/sarek pipeline -// - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + - " https://doi.org/10.12688/f1000research.16665.2\n" + - " https://doi.org/10.5281/zenodo.4468605\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - // - // Print help to screen if required - // - public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.tsv --genome GRCh38 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Print parameter summary log to screen - // - public static String paramsSummaryLog(workflow, params, log) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params, log) - System.exit(0) - } - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params, log) - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (params.enable_conda) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) - } - } - - // - // Get attribute from genome config file e.g. fasta - // - public static String getGenomeAttribute(params, attribute) { - def val = '' - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - val = params.genomes[ params.genome ][ attribute ] - } - } - return val - } -} diff --git a/lib/WorkflowSarek.groovy b/lib/WorkflowSarek.groovy deleted file mode 100755 index 5b0974ffbd..0000000000 --- a/lib/WorkflowSarek.groovy +++ /dev/null @@ -1,59 +0,0 @@ -// -// This file holds several functions specific to the workflow/sarek.nf in the nf-core/sarek pipeline -// - -class WorkflowSarek { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - genomeExistsError(params, log) - - if (!params.fasta) { - log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - System.exit(1) - } - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - // - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - log.error "=============================================================================\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "===================================================================================" - System.exit(1) - } - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb5e4..0000000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf old mode 100644 new mode 100755 index 8e6e3df37c..96dd0c078a --- a/main.nf +++ b/main.nf @@ -1,89 +1,409 @@ #!/usr/bin/env nextflow /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nf-core/sarek -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Started March 2016. Ported to nf-core May 2019. Ported to DSL 2 July 2020. ----------------------------------------------------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nf-core/sarek: An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing ----------------------------------------------------------------------------------------- - @Website - https://nf-co.re/sarek ----------------------------------------------------------------------------------------- - @Documentation - https://nf-co.re/sarek/usage ----------------------------------------------------------------------------------------- - @Github - https://github.com/nf-core/sarek ----------------------------------------------------------------------------------------- - @Slack - https://nfcore.slack.com/channels/sarek ----------------------------------------------------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Github : https://github.com/nf-core/sarek + Website: https://nf-co.re/sarek + Docs : https://nf-co.re/sarek/usage + Slack : https://nfcore.slack.com/channels/sarek +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -nextflow.enable.dsl = 2 - /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GENOME PARAMETER VALUES -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - -params.ac_loci = WorkflowMain.getGenomeAttribute(params, 'ac_loci') -params.ac_loci_gc = WorkflowMain.getGenomeAttribute(params, 'ac_loci_gc') -params.bwa = WorkflowMain.getGenomeAttribute(params, 'bwa') -params.chr_dir = WorkflowMain.getGenomeAttribute(params, 'chr_dir') -params.chr_length = WorkflowMain.getGenomeAttribute(params, 'chr_length') -params.dbsnp = WorkflowMain.getGenomeAttribute(params, 'dbsnp') -params.dbsnp_tbi = WorkflowMain.getGenomeAttribute(params, 'dbsnp_tbi') -params.dict = WorkflowMain.getGenomeAttribute(params, 'dict') -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') -params.fasta_fai = WorkflowMain.getGenomeAttribute(params, 'fasta_fai') -params.germline_resource = WorkflowMain.getGenomeAttribute(params, 'germline_resource') -params.germline_resource_tbi = WorkflowMain.getGenomeAttribute(params, 'germline_resource_tbi') -params.intervals = WorkflowMain.getGenomeAttribute(params, 'intervals') -params.known_indels = WorkflowMain.getGenomeAttribute(params, 'known_indels') -params.known_indels_tbi = WorkflowMain.getGenomeAttribute(params, 'known_indels_tbi') -params.mappability = WorkflowMain.getGenomeAttribute(params, 'mappability') -params.snpeff_db = WorkflowMain.getGenomeAttribute(params, 'snpeff_db') -params.vep_cache_version = WorkflowMain.getGenomeAttribute(params, 'vep_cache_version') -params.vep_genome = WorkflowMain.getGenomeAttribute(params, 'vep_genome') -params.vep_species = WorkflowMain.getGenomeAttribute(params, 'vep_species') +params.ascat_alleles = getGenomeAttribute('ascat_alleles') +params.ascat_genome = getGenomeAttribute('ascat_genome') +params.ascat_loci = getGenomeAttribute('ascat_loci') +params.ascat_loci_gc = getGenomeAttribute('ascat_loci_gc') +params.ascat_loci_rt = getGenomeAttribute('ascat_loci_rt') +params.bwa = getGenomeAttribute('bwa') +params.bwamem2 = getGenomeAttribute('bwamem2') +params.cf_chrom_len = getGenomeAttribute('cf_chrom_len') +params.chr_dir = getGenomeAttribute('chr_dir') +params.dbsnp = getGenomeAttribute('dbsnp') +params.dbsnp_tbi = getGenomeAttribute('dbsnp_tbi') +params.dbsnp_vqsr = getGenomeAttribute('dbsnp_vqsr') +params.dict = getGenomeAttribute('dict') +params.dragmap = getGenomeAttribute('dragmap') +params.fasta = getGenomeAttribute('fasta') +params.fasta_fai = getGenomeAttribute('fasta_fai') +params.germline_resource = getGenomeAttribute('germline_resource') +params.germline_resource_tbi = getGenomeAttribute('germline_resource_tbi') +params.intervals = getGenomeAttribute('intervals') +params.known_indels = getGenomeAttribute('known_indels') +params.known_indels_tbi = getGenomeAttribute('known_indels_tbi') +params.known_indels_vqsr = getGenomeAttribute('known_indels_vqsr') +params.known_snps = getGenomeAttribute('known_snps') +params.known_snps_tbi = getGenomeAttribute('known_snps_tbi') +params.known_snps_vqsr = getGenomeAttribute('known_snps_vqsr') +params.mappability = getGenomeAttribute('mappability') +params.msisensor2_models = getGenomeAttribute('msisensor2_models') +params.msisensorpro_scan = getGenomeAttribute('msisensorpro_scan') +params.ngscheckmate_bed = getGenomeAttribute('ngscheckmate_bed') +params.pon = getGenomeAttribute('pon') +params.pon_tbi = getGenomeAttribute('pon_tbi') +params.sentieon_dnascope_model = getGenomeAttribute('sentieon_dnascope_model') +params.snpeff_db = getGenomeAttribute('snpeff_db') +params.vep_cache_version = getGenomeAttribute('vep_cache_version') +params.vep_genome = getGenomeAttribute('vep_genome') +params.vep_species = getGenomeAttribute('vep_species') /* -======================================================================================== - VALIDATE & PRINT PARAMETER SUMMARY -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -WorkflowMain.initialise(workflow, params, log) +include { SAREK } from './workflows/sarek' +include { ANNOTATION_CACHE_INITIALISATION } from './subworkflows/local/annotation_cache_initialisation' +include { DOWNLOAD_CACHE_SNPEFF_VEP } from './subworkflows/local/download_cache_snpeff_vep' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_sarek_pipeline' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_sarek_pipeline' +include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' +include { PREPARE_INTERVALS } from './subworkflows/local/prepare_intervals' +include { PREPARE_REFERENCE_CNVKIT } from './subworkflows/local/prepare_reference_cnvkit' +include { PREPARE_SNPSIFT_DATABASES } from './subworkflows/local/prepare_snpsift_databases' +include { samplesheetToList } from 'plugin/nf-schema' /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NAMED WORKFLOW FOR PIPELINE -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { SAREK } from './workflows/sarek' - // WORKFLOW: Run main nf-core/sarek analysis pipeline workflow NFCORE_SAREK { - SAREK () -} + take: + samplesheet + + main: + versions = Channel.empty() + + // build indexes if needed + PREPARE_GENOME( + params.ascat_alleles, + params.ascat_loci, + params.ascat_loci_gc, + params.ascat_loci_rt, + params.bbsplit_fasta_list, + params.bbsplit_index, + params.bcftools_annotations, + params.bcftools_annotations_tbi, + params.bwa, + params.bwamem2, + params.chr_dir, + params.dbsnp, + params.dbsnp_tbi, + params.dict, + params.dragmap, + params.fasta, + params.fasta_fai, + params.germline_resource, + params.germline_resource_tbi, + params.known_indels, + params.known_indels_tbi, + params.known_snps, + params.known_snps_tbi, + params.msisensor2_models, + params.msisensorpro_scan, + params.pon, + params.pon_tbi, + params.aligner, + params.step, + params.tools ?: 'no_tools', + params.vep_include_fasta, + ) + + // Build intervals if needed + PREPARE_INTERVALS(PREPARE_GENOME.out.fasta_fai, params.intervals, params.no_intervals, params.nucleotides_per_second, params.outdir, params.step) + + // Intervals for speed up preprocessing/variant calling by spread/gather + // [interval.bed] all intervals in one file + intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined + intervals_bed_gz_tbi_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined + intervals_bed_combined_for_variant_calling = PREPARE_INTERVALS.out.intervals_bed_combined + + // For QC during preprocessing, we don't need any intervals (MOSDEPTH doesn't take them for WGS) + intervals_for_preprocessing = params.wes + ? intervals_bed_combined.map { it -> [[id: it.baseName], it] }.collect() + : Channel.value([[id: 'null'], []]) + // [ interval, num_intervals ] multiple interval.bed files, divided by useful intervals for scatter/gather + intervals = PREPARE_INTERVALS.out.intervals_bed + // [ interval_bed, tbi, num_intervals ] multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather + intervals_bed_gz_tbi = PREPARE_INTERVALS.out.intervals_bed_gz_tbi + + intervals_and_num_intervals = intervals.map { file, num_intervals -> + [num_intervals < 1 ? [] : file, num_intervals] + } + + intervals_bed_gz_tbi_and_num_intervals = intervals_bed_gz_tbi.map { file, num_intervals -> + [num_intervals < 1 ? [] : file[0], num_intervals < 1 ? [] : file[1], num_intervals] + } + + if (params.tools && params.tools.split(',').contains('cnvkit')) { + if (params.cnvkit_reference) { + cnvkit_reference = Channel.fromPath(params.cnvkit_reference).collect() + } + else { + PREPARE_REFERENCE_CNVKIT(PREPARE_GENOME.out.fasta, intervals_bed_combined) + cnvkit_reference = PREPARE_REFERENCE_CNVKIT.out.cnvkit_reference + versions = versions.mix(PREPARE_REFERENCE_CNVKIT.out.versions) + } + } + else { + cnvkit_reference = Channel.value([]) + } + // Gather used softwares versions + versions = versions.mix(PREPARE_GENOME.out.versions) + versions = versions.mix(PREPARE_INTERVALS.out.versions) + + // Fails when consensus calling is specified without normalization + if (params.snv_consensus_calling && !params.normalize_vcfs) { + error("Consensus calling was specified without normalization. Set --normalize_vcfs in addition. See: https://www.biostars.org/p/307035/") + } + + + // Download cache + if (params.download_cache) { + // Assuming that even if the cache is provided, if the user specify download_cache, sarek will download the cache + ensemblvep_info = Channel.of([[id: "${params.vep_cache_version}_${params.vep_genome}"], params.vep_genome, params.vep_species, params.vep_cache_version]) + snpeff_info = Channel.of([[id: "${params.snpeff_db}"], params.snpeff_db]) + DOWNLOAD_CACHE_SNPEFF_VEP(ensemblvep_info, snpeff_info) + snpeff_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.snpeff_cache + vep_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.ensemblvep_cache.map { _meta, cache -> [cache] } + } + else { + // Looks for cache information either locally or on the cloud + ANNOTATION_CACHE_INITIALISATION( + (params.snpeff_cache && params.tools && (params.tools.split(',').contains("snpeff") || params.tools.split(',').contains('merge'))), + params.snpeff_cache, + params.snpeff_db, + (params.vep_cache && params.tools && (params.tools.split(',').contains("vep") || params.tools.split(',').contains('merge'))), + params.vep_cache, + params.vep_species, + params.vep_cache_version, + params.vep_genome, + params.vep_custom_args, + "Please refer to https://nf-co.re/sarek/usage#how-to-customise-snpeff-and-vep-annotation for more information.", + ) + + snpeff_cache = ANNOTATION_CACHE_INITIALISATION.out.snpeff_cache + vep_cache = ANNOTATION_CACHE_INITIALISATION.out.ensemblvep_cache + } + + vep_extra_files = [] + + if (params.dbnsfp && params.dbnsfp_tbi) { + vep_extra_files.add(file(params.dbnsfp, checkIfExists: true)) + vep_extra_files.add(file(params.dbnsfp_tbi, checkIfExists: true)) + } + else if (params.dbnsfp && !params.dbnsfp_tbi) { + System.err.println("DBNSFP: ${params.dbnsfp} has been provided with `--dbnsfp, but no dbnsfp_tbi has") + System.err.println("cf: https://nf-co.re/sarek/parameters#dbnsfp") + error("Execution halted due to dbnsfp inconsistency.") + } + + if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && params.spliceai_indel_tbi) { + vep_extra_files.add(file(params.spliceai_indel, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_indel_tbi, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_snv, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_snv_tbi, checkIfExists: true)) + } + + if (params.phenotypes_file) { + vep_extra_files.add(file(params.phenotypes_file, checkIfExists: true)) + if (params.phenotypes_file_tbi) { + vep_extra_files.add(file(params.phenotypes_file_tbi, checkIfExists: true)) + } + } + // Build SnpSift annotation databases configuration from CSV samplesheet + // CSV format: vcf,tbi,fields,prefix,vardb + // - vcf: Path to annotation VCF (required) + // - tbi: Path to tabix index (optional, defaults to ${vcf}.tbi) + // - fields: Semicolon-separated INFO fields to extract (optional) + // - prefix: Prefix for annotated field names (optional) + // - vardb: Path to pre-built .snpsift.vardb directory (optional) + snpsift_db_configs = [] + + if (params.snpsift_databases) { + // Parse and validate CSV using nf-schema + // Returns list of tuples: [vcf, tbi, fields, prefix, vardb] + def db_list = samplesheetToList(params.snpsift_databases, "${projectDir}/assets/schema_snpsift_databases.json") + + db_list.each { vcf, tbi, fields, prefix, vardb -> + // Fields are required when vardb is not provided (needed to build the database) + if (!vardb && !fields) { + error("SnpSift database '${vcf}': 'fields' column is required when 'vardb' is not provided (needed for database creation)") + } + + def vcf_file = file(vcf, checkIfExists: true) + def tbi_file = tbi ? file(tbi, checkIfExists: true) : file("${vcf}.tbi", checkIfExists: true) + def vardb_file = vardb ? file(vardb, checkIfExists: true) : null + + snpsift_db_configs.add([ + vcf: vcf_file, + tbi: tbi_file, + fields: fields ?: '', + prefix: prefix ?: '', + vardb: vardb_file + ]) + } + } + + // Prepare SnpSift databases (build if vardb not provided, returns tuple for SNPSIFT_ANNMEM) + ch_snpsift_db = Channel.value([[], [], [], [], []]) + if (params.tools && params.tools.split(',').contains('snpsift') && snpsift_db_configs) { + PREPARE_SNPSIFT_DATABASES(snpsift_db_configs) + ch_snpsift_db = PREPARE_SNPSIFT_DATABASES.out.db_tuple + } + + // + // WORKFLOW: Run pipeline + // + SAREK( + samplesheet, + params.aligner, + params.skip_tools ?: 'no_tools', + params.step, + params.tools ?: 'no_tools', + PREPARE_GENOME.out.ascat_alleles, + PREPARE_GENOME.out.ascat_loci, + PREPARE_GENOME.out.ascat_loci_gc, + PREPARE_GENOME.out.ascat_loci_rt, + PREPARE_GENOME.out.bbsplit_index, + PREPARE_GENOME.out.bcftools_annotations, + PREPARE_GENOME.out.bcftools_annotations_tbi, + params.bcftools_columns ? Channel.fromPath(params.bcftools_columns).collect() : Channel.value([]), + params.bcftools_header_lines ? Channel.fromPath(params.bcftools_header_lines).collect() : Channel.empty(), + params.cf_chrom_len ? Channel.fromPath(params.cf_chrom_len).collect() : [], + PREPARE_GENOME.out.chr_dir, + cnvkit_reference, + PREPARE_GENOME.out.dbsnp, + PREPARE_GENOME.out.dbsnp_tbi, + params.dbsnp_vqsr ? Channel.value(params.dbsnp_vqsr) : Channel.empty(), + PREPARE_GENOME.out.dict, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fasta_fai, + PREPARE_GENOME.out.germline_resource, + PREPARE_GENOME.out.germline_resource_tbi, + PREPARE_GENOME.out.index_alignment, + intervals_and_num_intervals, + intervals_bed_combined, + intervals_bed_combined_for_variant_calling, + intervals_bed_gz_tbi_and_num_intervals, + intervals_bed_gz_tbi_combined, + intervals_for_preprocessing, + params.known_indels_vqsr ? Channel.value(params.known_indels_vqsr) : Channel.empty(), + PREPARE_GENOME.out.known_sites_indels, + PREPARE_GENOME.out.known_sites_indels_tbi, + PREPARE_GENOME.out.known_sites_snps, + PREPARE_GENOME.out.known_sites_snps_tbi, + params.known_snps_vqsr ? Channel.value(params.known_snps_vqsr) : Channel.empty(), + params.mappability ? Channel.fromPath(params.mappability).collect() : Channel.value([]), + PREPARE_GENOME.out.msisensor2_models, + PREPARE_GENOME.out.msisensorpro_scan, + params.ngscheckmate_bed ? Channel.value(params.ngscheckmate_bed) : Channel.empty(), + PREPARE_GENOME.out.pon, + PREPARE_GENOME.out.pon_tbi, + params.sentieon_dnascope_model ? Channel.fromPath(params.sentieon_dnascope_model).collect() : Channel.value([]), + params.varlociraptor_scenario_germline ? Channel.fromPath(params.varlociraptor_scenario_germline).map { it -> [[id: it.baseName - '.yte'], it] }.collect() : Channel.fromPath("${projectDir}/assets/varlociraptor_germline.yte.yaml").collect(), + params.varlociraptor_scenario_somatic ? Channel.fromPath(params.varlociraptor_scenario_somatic).map { it -> [[id: it.baseName - '.yte'], it] }.collect() : Channel.fromPath("${projectDir}/assets/varlociraptor_somatic.yte.yaml").collect(), + params.varlociraptor_scenario_tumor_only ? Channel.fromPath(params.varlociraptor_scenario_tumor_only).map { it -> [[id: it.baseName - '.yte'], it] }.collect() : Channel.fromPath("${projectDir}/assets/varlociraptor_tumor_only.yte.yaml").collect(), + snpeff_cache, + params.snpeff_db, + vep_cache, + params.vep_cache_version, + vep_extra_files, + PREPARE_GENOME.out.vep_fasta, + params.vep_genome, + params.vep_species, + ch_snpsift_db, + versions, + ) + + emit: + multiqc_publish = SAREK.out.multiqc_publish + multiqc_report = SAREK.out.multiqc_report // channel: /path/to/multiqc_report.html +} /* -======================================================================================== - RUN ALL WORKFLOWS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 workflow { - NFCORE_SAREK () + + main: + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION( + params.version, + params.validate_params, + args, + params.outdir, + params.input, + params.help, + params.help_full, + params.show_hidden, + ) + + // + // WORKFLOW: Run main workflow + // + NFCORE_SAREK(PIPELINE_INITIALISATION.out.samplesheet) + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + NFCORE_SAREK.out.multiqc_report, + ) + + publish: + multiqc = NFCORE_SAREK.out.multiqc_publish +} + +output { + multiqc { + path "multiqc" + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Get attribute from genome config file e.g. fasta +// + +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[params.genome].containsKey(attribute)) { + return params.genomes[params.genome][attribute] + } + } + return null } diff --git a/modules.json b/modules.json index f4c6cfec9b..4f2b636aa0 100644 --- a/modules.json +++ b/modules.json @@ -2,169 +2,636 @@ "name": "nf-core/sarek", "homePage": "https://github.com/nf-core/sarek", "repos": { - "nf-core/modules": { - "bcftools/stats": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "bwa/index": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "bwa/mem": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "bwamem2/index": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "bwamem2/mem": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "cat/fastq": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "cnvkit/batch": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "custom/dumpsoftwareversions": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "deepvariant": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "ensemblvep": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "fastqc": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "fgbio/callmolecularconsensusreads": { - "git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961" - }, - "fgbio/fastqtobam": { - "git_sha": "e3285528aca2733ff2d544cb5e5fcc34599226f3" - }, - "fgbio/groupreadsbyumi": { - "git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961" - }, - "freebayes": { - "git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961" - }, - "gatk4/applybqsr": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "gatk4/applyvqsr": { - "git_sha": "598d7abdb2a8df1aa3471c48d9186a9e3465983f" - }, - "gatk4/baserecalibrator": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "gatk4/calculatecontamination": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "gatk4/createsequencedictionary": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "gatk4/estimatelibrarycomplexity": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "gatk4/filtermutectcalls": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "gatk4/gatherbqsrreports": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "gatk4/genomicsdbimport": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "gatk4/genotypegvcfs": { - "git_sha": "598d7abdb2a8df1aa3471c48d9186a9e3465983f" - }, - "gatk4/getpileupsummaries": { - "git_sha": "f5d5926516d2319c1af83fb4b33834cc4461ce62" - }, - "gatk4/haplotypecaller": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "gatk4/learnreadorientationmodel": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "gatk4/markduplicates": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "gatk4/mutect2": { - "git_sha": "f5d5926516d2319c1af83fb4b33834cc4461ce62" - }, - "gatk4/variantrecalibrator": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "manta/germline": { - "git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961" - }, - "manta/somatic": { - "git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961" - }, - "manta/tumoronly": { - "git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961" - }, - "msisensorpro/msi_somatic": { - "git_sha": "c8ebd0de36c649a14fc92f2f73cbd9f691a8ce0a" - }, - "msisensorpro/scan": { - "git_sha": "c8ebd0de36c649a14fc92f2f73cbd9f691a8ce0a" - }, - "multiqc": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "samblaster": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "samtools/bam2fq": { - "git_sha": "e751e5040af57e1b4e06ed4e0f3efe6de25c1683" - }, - "samtools/faidx": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "samtools/index": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "samtools/merge": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "samtools/mpileup": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "samtools/stats": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "samtools/view": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "seqkit/split2": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "snpeff": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "strelka/germline": { - "git_sha": "f5d5926516d2319c1af83fb4b33834cc4461ce62" - }, - "strelka/somatic": { - "git_sha": "f5d5926516d2319c1af83fb4b33834cc4461ce62" - }, - "tabix/bgziptabix": { - "git_sha": "e22966ce74340cb671576143e5fdbbd71670cffa" - }, - "tabix/tabix": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "tiddit/sv": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "trimgalore": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "vcftools": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "ascat": { + "branch": "master", + "git_sha": "edd5dd7ef8d99cc059dcdff2de08d936838b8ca4", + "installed_by": ["modules"] + }, + "bbmap/bbsplit": { + "branch": "master", + "git_sha": "6d1d4b987af3ee5852d852abf99f8883028cea9c", + "installed_by": ["modules"] + }, + "bcftools/annotate": { + "branch": "master", + "git_sha": "28438d38e50710e20885ff2594344b9d5a03dec7", + "installed_by": ["modules"] + }, + "bcftools/concat": { + "branch": "master", + "git_sha": "1503efe8f6450e71218097f93cf43e4b625018d4", + "installed_by": ["modules"] + }, + "bcftools/isec": { + "branch": "master", + "git_sha": "f17049e03697726ace7499d2fe342f892594f6f3", + "installed_by": ["modules"] + }, + "bcftools/merge": { + "branch": "master", + "git_sha": "f17049e03697726ace7499d2fe342f892594f6f3", + "installed_by": ["modules"] + }, + "bcftools/mpileup": { + "branch": "master", + "git_sha": "c9c3ef86c1892413b3c86fb38c4e39fd7288512f", + "installed_by": ["bam_ngscheckmate"] + }, + "bcftools/norm": { + "branch": "master", + "git_sha": "39fed2e840a805454a64dda9c2ef64c00e2c6781", + "installed_by": ["modules"] + }, + "bcftools/sort": { + "branch": "master", + "git_sha": "c9c3ef86c1892413b3c86fb38c4e39fd7288512f", + "installed_by": ["modules"] + }, + "bcftools/stats": { + "branch": "master", + "git_sha": "c9c3ef86c1892413b3c86fb38c4e39fd7288512f", + "installed_by": ["modules"] + }, + "bcftools/view": { + "branch": "master", + "git_sha": "f17049e03697726ace7499d2fe342f892594f6f3", + "installed_by": ["modules"] + }, + "bwa/index": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "bwa/mem": { + "branch": "master", + "git_sha": "a29f18660f5e3748d44d6f716241e70c942c065d", + "installed_by": ["modules"] + }, + "bwamem2/index": { + "branch": "master", + "git_sha": "b2902040b9cb9b7b32b62400f1c024a709bd4812", + "installed_by": ["modules"] + }, + "bwamem2/mem": { + "branch": "master", + "git_sha": "a29f18660f5e3748d44d6f716241e70c942c065d", + "installed_by": ["modules"] + }, + "cat/cat": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "cat/fastq": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "cnvkit/antitarget": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "cnvkit/batch": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "cnvkit/call": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] + }, + "cnvkit/export": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] + }, + "cnvkit/genemetrics": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] + }, + "cnvkit/reference": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "controlfreec/assesssignificance": { + "branch": "master", + "git_sha": "b9cec8b07fca8372fd97e46b9f82f1cca4a12dc5", + "installed_by": ["modules"], + "patch": "modules/nf-core/controlfreec/assesssignificance/controlfreec-assesssignificance.diff" + }, + "controlfreec/freec": { + "branch": "master", + "git_sha": "b9cec8b07fca8372fd97e46b9f82f1cca4a12dc5", + "installed_by": ["modules"] + }, + "controlfreec/freec2bed": { + "branch": "master", + "git_sha": "b9cec8b07fca8372fd97e46b9f82f1cca4a12dc5", + "installed_by": ["modules"] + }, + "controlfreec/freec2circos": { + "branch": "master", + "git_sha": "b9cec8b07fca8372fd97e46b9f82f1cca4a12dc5", + "installed_by": ["modules"] + }, + "controlfreec/makegraph2": { + "branch": "master", + "git_sha": "b9cec8b07fca8372fd97e46b9f82f1cca4a12dc5", + "installed_by": ["modules"] + }, + "deepvariant/rundeepvariant": { + "branch": "master", + "git_sha": "dd78204eb24332ec3cdd512fa9a83898f9728842", + "installed_by": ["modules"] + }, + "dragmap/align": { + "branch": "master", + "git_sha": "8b06d86f6a82b6203f239ad409f606fdf71ec697", + "installed_by": ["modules"] + }, + "dragmap/hashtable": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "ensemblvep/download": { + "branch": "master", + "git_sha": "09ea5d9fcd6eda714f148e0f3b6a4920b2091f41", + "installed_by": ["modules"] + }, + "ensemblvep/vep": { + "branch": "master", + "git_sha": "691bb6956ec56154b81ba5771641ca24443c4d17", + "installed_by": ["modules"] + }, + "fastp": { + "branch": "master", + "git_sha": "d082103d7976a2804f21225446cc110cbd822f4c", + "installed_by": ["modules"] + }, + "fastqc": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "fgbio/callmolecularconsensusreads": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "fgbio/copyumifromreadname": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "fgbio/fastqtobam": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "fgbio/groupreadsbyumi": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "freebayes": { + "branch": "master", + "git_sha": "d04951ee68e3e8b875ebf5ddb7ba6e05233624c1", + "installed_by": ["modules"] + }, + "gatk4/applybqsr": { + "branch": "master", + "git_sha": "620300d96c4b9051d533a5ea5f9d3b2c64b781ce", + "installed_by": ["modules"] + }, + "gatk4/applyvqsr": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "gatk4/baserecalibrator": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "gatk4/calculatecontamination": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "gatk4/cnnscorevariants": { + "branch": "master", + "git_sha": "1999eff2c530b2b185a25cc42117a1686f09b685", + "installed_by": ["modules"] + }, + "gatk4/createsequencedictionary": { + "branch": "master", + "git_sha": "a57253204b8f4022edfeaa3ae2f5e2abecd8858b", + "installed_by": ["modules"] + }, + "gatk4/estimatelibrarycomplexity": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "gatk4/filtermutectcalls": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "gatk4/filtervarianttranches": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] + }, + "gatk4/gatherbqsrreports": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] + }, + "gatk4/gatherpileupsummaries": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] + }, + "gatk4/genomicsdbimport": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "gatk4/genotypegvcfs": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] + }, + "gatk4/getpileupsummaries": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "gatk4/haplotypecaller": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "gatk4/intervallisttobed": { + "branch": "master", + "git_sha": "20fe8646005253d57a7a8db42abf69ea0966dc75", + "installed_by": ["modules"] + }, + "gatk4/learnreadorientationmodel": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] + }, + "gatk4/markduplicates": { + "branch": "master", + "git_sha": "1ec937ab3edc307bc0d79a2200d784e9f0868359", + "installed_by": ["modules"] + }, + "gatk4/mergemutectstats": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "gatk4/mergevcfs": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "gatk4/mutect2": { + "branch": "master", + "git_sha": "a97cba262e9367734e435dc07d2e3b7d6121ef3e", + "installed_by": ["modules"] + }, + "gatk4/variantrecalibrator": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] + }, + "gatk4spark/applybqsr": { + "branch": "master", + "git_sha": "620300d96c4b9051d533a5ea5f9d3b2c64b781ce", + "installed_by": ["modules"] + }, + "gatk4spark/baserecalibrator": { + "branch": "master", + "git_sha": "fa55ebb81654fe1736975fa28d1af5a079bf6a08", + "installed_by": ["modules"] + }, + "gatk4spark/markduplicates": { + "branch": "master", + "git_sha": "fa55ebb81654fe1736975fa28d1af5a079bf6a08", + "installed_by": ["modules"] + }, + "gawk": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "goleft/indexcov": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "gunzip": { + "branch": "master", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "installed_by": ["modules"] + }, + "lofreq/callparallel": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "manta/germline": { + "branch": "master", + "git_sha": "ae6b18e8930fe66595c8b08633b484d42970a564", + "installed_by": ["modules"] + }, + "manta/somatic": { + "branch": "master", + "git_sha": "ae6b18e8930fe66595c8b08633b484d42970a564", + "installed_by": ["modules"] + }, + "manta/tumoronly": { + "branch": "master", + "git_sha": "ae6b18e8930fe66595c8b08633b484d42970a564", + "installed_by": ["modules"] + }, + "mosdepth": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "msisensor2/msi": { + "branch": "master", + "git_sha": "d96d6f176de5729d21c3b33a610b486f7ef7eac9", + "installed_by": ["modules"] + }, + "msisensorpro/msisomatic": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "msisensorpro/scan": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "multiqc": { + "branch": "master", + "git_sha": "9656d955b700a8707c4a67821ab056f8c1095675", + "installed_by": ["modules"] + }, + "muse/call": { + "branch": "master", + "git_sha": "2ae2df92d6f5ac2d54058d0cece928e13d241912", + "installed_by": ["modules"] + }, + "muse/sump": { + "branch": "master", + "git_sha": "2ae2df92d6f5ac2d54058d0cece928e13d241912", + "installed_by": ["modules"] + }, + "ngscheckmate/ncm": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["bam_ngscheckmate"] + }, + "parabricks/fq2bam": { + "branch": "master", + "git_sha": "f0b0dc3c3f001b1b7bf814a3c353baecbab17ea1", + "installed_by": ["modules"] + }, + "rbt/vcfsplit": { + "branch": "master", + "git_sha": "117bda90be326bcff5cb2286c7988a4caa588b5d", + "installed_by": ["modules"] + }, + "samtools/bam2fq": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "samtools/collatefastq": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "samtools/convert": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "samtools/merge": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "samtools/mpileup": { + "branch": "master", + "git_sha": "7e20d971c70d78dbd9f610698267f37b7fb3d38a", + "installed_by": ["modules"] + }, + "samtools/stats": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "samtools/view": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "sentieon/applyvarcal": { + "branch": "master", + "git_sha": "256557155b2439c5d551f1a668bf6a7ff6b10f93", + "installed_by": ["modules"] + }, + "sentieon/bwamem": { + "branch": "master", + "git_sha": "256557155b2439c5d551f1a668bf6a7ff6b10f93", + "installed_by": ["modules"] + }, + "sentieon/dedup": { + "branch": "master", + "git_sha": "256557155b2439c5d551f1a668bf6a7ff6b10f93", + "installed_by": ["modules"] + }, + "sentieon/dnamodelapply": { + "branch": "master", + "git_sha": "256557155b2439c5d551f1a668bf6a7ff6b10f93", + "installed_by": ["modules"] + }, + "sentieon/dnascope": { + "branch": "master", + "git_sha": "256557155b2439c5d551f1a668bf6a7ff6b10f93", + "installed_by": ["modules"] + }, + "sentieon/gvcftyper": { + "branch": "master", + "git_sha": "256557155b2439c5d551f1a668bf6a7ff6b10f93", + "installed_by": ["modules"] + }, + "sentieon/haplotyper": { + "branch": "master", + "git_sha": "256557155b2439c5d551f1a668bf6a7ff6b10f93", + "installed_by": ["modules"] + }, + "sentieon/tnscope": { + "branch": "master", + "git_sha": "256557155b2439c5d551f1a668bf6a7ff6b10f93", + "installed_by": ["modules"] + }, + "sentieon/varcal": { + "branch": "master", + "git_sha": "256557155b2439c5d551f1a668bf6a7ff6b10f93", + "installed_by": ["modules"] + }, + "snpeff/download": { + "branch": "master", + "git_sha": "ab50ae0a7c0714bf8b13918c1b7af732002c244d", + "installed_by": ["modules"] + }, + "snpeff/snpeff": { + "branch": "master", + "git_sha": "51d2d20973eca80048a9490339e8b421db122908", + "installed_by": ["modules", "vcf_annotate_snpeff"] + }, + "snpsift/annmem": { + "branch": "master", + "git_sha": "e12534f75a4504b67984412122698c1cf1966e6f", + "installed_by": ["modules"] + }, + "snpsift/annmemcreate": { + "branch": "master", + "git_sha": "e12534f75a4504b67984412122698c1cf1966e6f", + "installed_by": ["modules"] + }, + "spring/decompress": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "strelka/germline": { + "branch": "master", + "git_sha": "039730fab3f0150585ad46c402c6bf95396d88b5", + "installed_by": ["modules"] + }, + "strelka/somatic": { + "branch": "master", + "git_sha": "039730fab3f0150585ad46c402c6bf95396d88b5", + "installed_by": ["modules"] + }, + "svdb/merge": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "tabix/bgziptabix": { + "branch": "master", + "git_sha": "91a902fb32d6717da38a9694eb4ad3fade53a8db", + "installed_by": ["modules", "vcf_annotate_snpeff"] + }, + "tabix/tabix": { + "branch": "master", + "git_sha": "f2cfcf9d3f6a2d123e6c44aefa788aa232204a7a", + "installed_by": ["modules"] + }, + "tiddit/sv": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "untar": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "unzip": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "varlociraptor/callvariants": { + "branch": "master", + "git_sha": "5ae3c74e09c529c5c768aa89397e490ba4728219", + "installed_by": ["modules"] + }, + "varlociraptor/estimatealignmentproperties": { + "branch": "master", + "git_sha": "5ae3c74e09c529c5c768aa89397e490ba4728219", + "installed_by": ["modules"] + }, + "varlociraptor/preprocess": { + "branch": "master", + "git_sha": "5ae3c74e09c529c5c768aa89397e490ba4728219", + "installed_by": ["modules"] + }, + "vcflib/vcffilter": { + "branch": "master", + "git_sha": "401ec2b2b8d0938d12ae4f9e25819e14596b8f83", + "installed_by": ["modules"] + }, + "vcftools": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, + "yte": { + "branch": "master", + "git_sha": "c734c101f0010ec13ed9bbf578477962c32f9cc3", + "installed_by": ["modules"] + } + } + }, + "subworkflows": { + "nf-core": { + "bam_ngscheckmate": { + "branch": "master", + "git_sha": "c9c3ef86c1892413b3c86fb38c4e39fd7288512f", + "installed_by": ["subworkflows"] + }, + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "f0b535b3ae20080f8db03dd5388876ad1ec29d70", + "installed_by": ["subworkflows"] + }, + "utils_nfschema_plugin": { + "branch": "master", + "git_sha": "fdc08b8b1ae74f56686ce21f7ea11ad11990ce57", + "installed_by": ["subworkflows"] + }, + "vcf_annotate_snpeff": { + "branch": "master", + "git_sha": "23004c9c64013c90b7d835621ef4cdeff19a1427", + "installed_by": ["subworkflows"] + } + } } } } -} \ No newline at end of file +} diff --git a/modules/local/add_info_to_vcf/environment.yml b/modules/local/add_info_to_vcf/environment.yml new file mode 100644 index 0000000000..315f6dc67e --- /dev/null +++ b/modules/local/add_info_to_vcf/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::gawk=5.3.0 diff --git a/modules/local/add_info_to_vcf/main.nf b/modules/local/add_info_to_vcf/main.nf new file mode 100644 index 0000000000..466c33f15b --- /dev/null +++ b/modules/local/add_info_to_vcf/main.nf @@ -0,0 +1,41 @@ +process ADD_INFO_TO_VCF { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/gawk:5.3.0' + : 'biocontainers/gawk:5.3.0'}" + + input: + tuple val(meta), path(vcf_gz) + + output: + tuple val(meta), path("*.added_info.vcf"), emit: vcf + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + input="input.vcf" + output="${vcf_gz.baseName.minus(".vcf")}.added_info.vcf" + zcat ${vcf_gz} > \$input + ## Add info header lines + grep -E "^##" \$input > \$output + ## Add description of new INFO value + echo '##INFO=' >> \$output + ## Add column header + grep -E "^#CHROM" \$input >> \$output + ## Add SOURCE value to INFO column of variant calls + if grep -Ev "^#" \$input; then + grep -Ev "^#" \$input | awk 'BEGIN{FS=OFS="\t"} { \$8=="." ? \$8="SOURCE=${vcf_gz}" : \$8=\$8";SOURCE=${vcf_gz}"; print }' >> \$output + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/modules/local/bgzip.nf b/modules/local/bgzip.nf deleted file mode 100644 index 6e312ea4dd..0000000000 --- a/modules/local/bgzip.nf +++ /dev/null @@ -1,29 +0,0 @@ - -process BGZIP { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::tabix=1.11" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : - 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" - - input: - tuple val(meta), path(vcf_gz) - - output: - tuple val(meta), path("*.vcf"), emit: vcf - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - bgzip -d $vcf_gz -@${task.cpus} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/build_intervals/main.nf b/modules/local/build_intervals/main.nf deleted file mode 100644 index 3198bea9ed..0000000000 --- a/modules/local/build_intervals/main.nf +++ /dev/null @@ -1,20 +0,0 @@ -process BUILD_INTERVALS { - tag "$fasta_fai" - label 'process_medium' - - conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : - 'quay.io/biocontainers/gawk:5.1.0' }" - - input: - path fasta_fai - - output: - path "*.bed", emit: bed - - script: - """ - awk -v FS='\t' -v OFS='\t' '{ print \$1, \"0\", \$2 }' ${fasta_fai} > ${fasta_fai.baseName}.bed - """ -} diff --git a/modules/local/concat_vcf/main.nf b/modules/local/concat_vcf/main.nf deleted file mode 100644 index 26cee2d4b0..0000000000 --- a/modules/local/concat_vcf/main.nf +++ /dev/null @@ -1,32 +0,0 @@ -process CONCAT_VCF { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::bcftools=1.14" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--hde04aa1_1' : - 'quay.io/biocontainers/bcftools:1.14--hde04aa1_1' }" - - input: - tuple val(meta), path(vcf) - path fasta_fai - path target_bed - - output: - tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf - tuple val(meta), path("${prefix}.vcf.gz.tbi"), emit: tbi - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def target_options = target_bed ? "-t ${target_bed}" : "" - - """ - concatenateVCFs.sh -i ${fasta_fai} -c ${task.cpus} -o ${prefix}.vcf ${target_options} $args - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/consensus_from_sites/environment.yml b/modules/local/consensus_from_sites/environment.yml new file mode 100644 index 0000000000..5f9310b448 --- /dev/null +++ b/modules/local/consensus_from_sites/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::gawk=5.3.0 + - bioconda::htslib=1.21 diff --git a/modules/local/consensus_from_sites/main.nf b/modules/local/consensus_from_sites/main.nf new file mode 100644 index 0000000000..510cf045e7 --- /dev/null +++ b/modules/local/consensus_from_sites/main.nf @@ -0,0 +1,61 @@ +// Create consensus VCF from bcftools isec sites.txt output +// Simpler alternative to merging numbered VCFs - only captures presence/absence per caller + +process CONSENSUS_FROM_SITES { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(isec_dir) + // Expects meta.callers = ['caller1', 'caller2', ...] in same order as isec input + + output: + tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.tbi"), emit: tbi + tuple val("${task.process}"), val('gawk'), eval("awk --version | head -n1 | sed 's/GNU Awk //; s/, .*//'"), emit: versions_gawk, topic: versions + tuple val("${task.process}"), val('htslib'), eval("tabix --version | head -n1 | sed 's/tabix (htslib) //'"), emit: versions_htslib, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def callers = meta.callers.join(',') + """ + awk -v callers="${callers}" ' + BEGIN { + OFS="\\t" + n=split(callers, c, ",") + print "##fileformat=VCFv4.2" + print "##INFO=" + print "##INFO=" + print "#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO" + } + { + split(\$5, bits, "") + caller_list=""; count=0 + for (i=1; i<=n; i++) { + if (bits[i]=="1") { + caller_list = caller_list (caller_list?",":"") c[i] + count++ + } + } + print \$1, \$2, ".", \$3, \$4, ".", ".", "CALLERS="caller_list";NCALLERS="count + }' ${isec_dir}/sites.txt | bgzip -c > ${prefix}.vcf.gz + + tabix -p vcf ${prefix}.vcf.gz + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + """ +} diff --git a/modules/local/create_intervals_bed/environment.yml b/modules/local/create_intervals_bed/environment.yml new file mode 100644 index 0000000000..315f6dc67e --- /dev/null +++ b/modules/local/create_intervals_bed/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::gawk=5.3.0 diff --git a/modules/local/create_intervals_bed/main.nf b/modules/local/create_intervals_bed/main.nf index 736abcf875..3ffe9a7428 100644 --- a/modules/local/create_intervals_bed/main.nf +++ b/modules/local/create_intervals_bed/main.nf @@ -1,30 +1,34 @@ process CREATE_INTERVALS_BED { - tag "$intervals" - label 'process_medium' + tag "${intervals}" + label 'process_single' - conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : - 'quay.io/biocontainers/gawk:5.1.0' }" + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/gawk:5.3.0' + : 'biocontainers/gawk:5.3.0'}" input: path intervals + val nucleotides_per_second output: - path ("*.bed"), emit: bed - //TODO version number missing + path ("*.bed"), emit: bed + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when script: // If intervals file is in BED format, // Fifth column is interpreted to contain runtime estimates // Which is then used to combine short-running jobs - if (intervals.toString().toLowerCase().endsWith("bed")) + if (intervals.toString().toLowerCase().endsWith("bed")) { """ awk -vFS="\t" '{ t = \$5 # runtime estimate if (t == "") { # no runtime estimate in this row, assume default value - t = (\$3 - \$2) / ${params.nucleotides_per_second} + t = (\$3 - \$2) / ${nucleotides_per_second} } if (name == "" || (chunk > 600 && (chunk + t) > longest * 1.05)) { # start a new chunk @@ -37,19 +41,48 @@ process CREATE_INTERVALS_BED { chunk += t print \$0 > name }' ${intervals} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS """ - else if (intervals.toString().toLowerCase().endsWith("interval_list")) + } + else if (intervals.toString().toLowerCase().endsWith("interval_list")) { """ grep -v '^@' ${intervals} | awk -vFS="\t" '{ name = sprintf("%s_%d-%d", \$1, \$2, \$3); printf("%s\\t%d\\t%d\\n", \$1, \$2-1, \$3) > name ".bed" }' + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS """ - else + } + else { """ awk -vFS="[:-]" '{ name = sprintf("%s_%d-%d", \$1, \$2, \$3); printf("%s\\t%d\\t%d\\n", \$1, \$2-1, \$3) > name ".bed" }' ${intervals} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS """ + } + + stub: + def prefix = task.ext.prefix ?: "${intervals.baseName}" + """ + touch ${prefix}.stub.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ } diff --git a/modules/local/deeptools/bamcoverage.nf b/modules/local/deeptools/bamcoverage.nf deleted file mode 100644 index 8e362b0511..0000000000 --- a/modules/local/deeptools/bamcoverage.nf +++ /dev/null @@ -1,33 +0,0 @@ -process DEEPTOOLS_BAMCOVERAGE { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::deeptools=3.5.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0': - 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" - - input: - tuple val(meta), path(bam), path(bai) - - output: - tuple val(meta), path("*.bigWig"), emit: bigwig - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - bamCoverage \ - --bam $bam \ - $args \ - --numberOfProcessors ${task.cpus} \ - --outFileName ${prefix}.bigWig - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - deeptools: \$(echo \$(deeptools --version 2>&1) | sed 's/^.*deeptools //; s/Using.*\$//' )) - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/applybqsrspark/main.nf b/modules/local/gatk4/applybqsrspark/main.nf deleted file mode 100644 index 24c42e3bc9..0000000000 --- a/modules/local/gatk4/applybqsrspark/main.nf +++ /dev/null @@ -1,45 +0,0 @@ -process GATK4_APPLYBQSR_SPARK { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - input: - tuple val(meta), path(cram), path(crai), path(bqsr_table), path(intervals_bed) - path fasta - path fasta_fai - path dict - - output: - tuple val(meta), path("*.cram"), emit: cram - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK ApplyBQSRSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" - """ - gatk ApplyBQSRSpark \\ - -R $fasta \\ - -I $cram \\ - --bqsr-recal-file $bqsr_table \\ - $intervals_command \\ - --tmp-dir . \ - -O ${prefix}.cram \\ - $args \ - --spark-master local[${task.cpus}] - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/applybqsrspark/meta.yml b/modules/local/gatk4/applybqsrspark/meta.yml deleted file mode 100644 index 23a16647b7..0000000000 --- a/modules/local/gatk4/applybqsrspark/meta.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: gatk4_applybqsr -description: Apply base quality score recalibration (BQSR) to a bam file -keywords: - - bqsr - - bam -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file from alignment - pattern: "*.{bam}" - - bqsr_table: - type: file - description: Recalibration table from gatk4_baserecalibrator - - fasta: - type: file - description: The reference fasta file - - fastaidx: - type: file - description: Index of reference fasta file - - dict: - type: file - description: GATK sequence dictionary - - intervalsBed: - type: file - description: Bed file with the genomic regions included in the library (optional) - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - bam: - type: file - description: Recalibrated BAM file - pattern: "*.{bam}" - -authors: - - "@yocra3" - - "@FriederikeHanssen" diff --git a/modules/local/gatk4/baserecalibratorspark/main.nf b/modules/local/gatk4/baserecalibratorspark/main.nf deleted file mode 100644 index baafb7c35b..0000000000 --- a/modules/local/gatk4/baserecalibratorspark/main.nf +++ /dev/null @@ -1,48 +0,0 @@ -process GATK4_BASERECALIBRATOR_SPARK { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - input: - tuple val(meta), path(cram), path(crai), path(intervals_bed) - path fasta - path fasta_fai - path dict - path known_sites - path known_sites_tbi - - output: - tuple val(meta), path("*.table"), emit: table - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK BaseRecalibratorSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" - def sites_command = known_sites.collect{"--known-sites ${it}"}.join(' ') - """ - gatk BaseRecalibratorSpark \ - -R $fasta \ - -I $cram \ - $sites_command \ - $intervals_command \ - --tmp-dir . \ - $args \ - -O ${prefix}.table \ - --spark-master local[${task.cpus}] - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/baserecalibratorspark/meta.yml b/modules/local/gatk4/baserecalibratorspark/meta.yml deleted file mode 100644 index 22e5f4a284..0000000000 --- a/modules/local/gatk4/baserecalibratorspark/meta.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: gatk4_baserecalibrator -description: Generate recalibration table for Base Quality Score Recalibration (BQSR) -keywords: - - sort -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file from alignment - pattern: "*.{bam}" - - fasta: - type: file - description: The reference fasta file - - fastaidx: - type: file - description: Index of reference fasta file - - dict: - type: file - description: GATK sequence dictionary - - intervalsBed: - type: file - description: Bed file with the genomic regions included in the library (optional) - - knownSites: - type: file - description: Bed file with the genomic regions included in the library (optional) - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - table: - type: file - description: Recalibration table from BaseRecalibrator - pattern: "*.{table}" - -authors: - - "@yocra3" - - "@FriederikeHanssen" diff --git a/modules/local/gatk4/gatherpileupsummaries.nf b/modules/local/gatk4/gatherpileupsummaries.nf deleted file mode 100644 index c90a7b0601..0000000000 --- a/modules/local/gatk4/gatherpileupsummaries.nf +++ /dev/null @@ -1,43 +0,0 @@ - -process GATK4_GATHERPILEUPSUMMARIES { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - - input: - tuple val(meta), path(pileup) - path dict - - output: - tuple val(meta), path("*.table"), emit: table - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def input = pileup.collect{ "-I ${it} " }.join(' ') - - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK GatherPileupSummaries] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" \ - GatherPileupSummaries \ - --sequence-dictionary ${dict} \ - ${input} \ - -O ${prefix}.pileupsummaries.table - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/intervallisttobed.nf b/modules/local/gatk4/intervallisttobed.nf deleted file mode 100644 index 7cca8b480b..0000000000 --- a/modules/local/gatk4/intervallisttobed.nf +++ /dev/null @@ -1,38 +0,0 @@ -process GATK4_INTERVALLISTTOBED { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - input: - tuple val(meta), path(interval) - - output: - tuple val(meta), path("*.bed"), emit: bed - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK IntervalListToBed] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" IntervalListToBed \\ - --INPUT ${interval} \\ - --OUTPUT ${meta.id}.bed \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/markduplicatesspark/main.nf b/modules/local/gatk4/markduplicatesspark/main.nf deleted file mode 100644 index d1b78a049a..0000000000 --- a/modules/local/gatk4/markduplicatesspark/main.nf +++ /dev/null @@ -1,46 +0,0 @@ -process GATK4_MARKDUPLICATES_SPARK { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - input: - tuple val(meta), path(bam) - path fasta - path fasta_fai - path dict - val format //either "bam" or "cram" - - output: - tuple val(meta), path("*.${format}"), emit: output - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def bams = bam.collect(){ x -> "-I ".concat(x.toString()) }.join(" ") - """ - gatk \ - MarkDuplicatesSpark \ - $bams \ - -O ${prefix}.${format} \ - --reference ${fasta} \ - --tmp-dir . \ - --spark-master local[${task.cpus}] \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/markduplicatesspark/meta.yml b/modules/local/gatk4/markduplicatesspark/meta.yml deleted file mode 100644 index abe61e023f..0000000000 --- a/modules/local/gatk4/markduplicatesspark/meta.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: gatk4_markduplicates -description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. -keywords: - - markduplicates - - bam - - sort -tools: - - gatk4: - description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- - tool_dev_url: https://github.com/broadinstitute/gatk - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['BSD-3-clause'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: Sorted BAM file - pattern: "*.{bam}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - bam: - type: file - description: Marked duplicates BAM file - pattern: "*.{bam}" - - metrics: - type: file - description: Duplicate metrics file generated by GATK - pattern: "*.{metrics.txt}" - -authors: - - "@ajodeh-juma" diff --git a/modules/local/gatk4/mergemutectstats.nf b/modules/local/gatk4/mergemutectstats.nf deleted file mode 100644 index b8e4bd1e46..0000000000 --- a/modules/local/gatk4/mergemutectstats.nf +++ /dev/null @@ -1,40 +0,0 @@ -process GATK4_MERGEMUTECTSTATS { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - input: - tuple val(meta), path(stats) - - - output: - tuple val(meta), path("*.stats"), emit: stats - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def input = stats.collect{ " -stats ${it} "}.join() - - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK MergeMutectStats] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" MergeMutectStats \\ - ${input} \\ - -output ${meta.id}.vcf.gz.stats \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/index_target_bed/main.nf b/modules/local/index_target_bed/main.nf deleted file mode 100644 index 2b25abe19c..0000000000 --- a/modules/local/index_target_bed/main.nf +++ /dev/null @@ -1,22 +0,0 @@ -process INDEX_TARGET_BED { - tag "$target_bed" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::bcftools=1.12" : null) - //TODO: No singularity container at the moment, use docker container for the moment - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'quay.io/biocontainers/bcftools:1.12--h45bccc9_1' : - 'quay.io/biocontainers/bcftools:1.12--h45bccc9_1' }" - - input: - path target_bed - - output: - tuple path("*.gz"), path("*.gz.tbi"), emit: gz_tbi - - script: - """ - bgzip --threads ${task.cpus} -c ${target_bed} > ${target_bed}.gz - tabix ${target_bed}.gz - """ -} diff --git a/modules/local/msisensorpro/msi/main.nf b/modules/local/msisensorpro/msi/main.nf deleted file mode 100644 index 2567854fbb..0000000000 --- a/modules/local/msisensorpro/msi/main.nf +++ /dev/null @@ -1,43 +0,0 @@ -process MSISENSORPRO_MSI { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? "bioconda::msisensor-pro=1.1.a" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/msisensor-pro:1.1.a--hb3646a4_0' : - 'quay.io/biocontainers/msisensor-pro:1.1.a--hb3646a4_0' }" - - input: - tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor) - path msisensorpro_scan - - output: - tuple val(meta), path("${prefix}.list") , emit: output - tuple val(meta), path("${prefix}_dis.list") , emit: output_dis - tuple val(meta), path("${prefix}_germline.list"), emit: output_germline - tuple val(meta), path("${prefix}_somatic.list") , emit: output_somatic - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - """ - msisensor-pro msi \\ - -d $msisensorpro_scan \\ - -n $bam_normal \\ - -t $bam_tumor \\ - -o $prefix \\ - -b $task.cpus \\ - $args - - mv ${prefix} ${prefix}.list - mv ${prefix}_dis ${prefix}_dis.list - mv ${prefix}_germline ${prefix}_germline.list - mv ${prefix}_somatic ${prefix}_somatic.list - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - msisensor-pro: \$(msisensor-pro 2>&1 | sed -nE 's/Version:\\sv([0-9]\\.[0-9])/\\1/ p') - END_VERSIONS - """ -} diff --git a/modules/local/msisensorpro/msi/meta.yml b/modules/local/msisensorpro/msi/meta.yml deleted file mode 100644 index e1d2220c67..0000000000 --- a/modules/local/msisensorpro/msi/meta.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: msisensorpro_msi - -description: evaluate MSI using paired tumor-normal sequencing data -keywords: - - microsatellite -tools: - - msisensor: - description: MSIsensor-pro is an updated version of msisensor - homepage: https://github.com/xjtu-omics/msisensor-pro - documentation: None - tool_dev_url: None - doi: "doi.org/10.1016/j.gpb.2020.02.001" - licence: ['free for non-commercial use by academic, government, and non-profit/not-for-profit institutions'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam_normal: - type: file - description: | - BAM file from normal tissue - - bai_normal: - type: file - description: | - Index for the BAM file from normal tissue - - bam_tumor: - type: file - description: | - BAM file from tumor tissue - - bai_tumor: - type: file - description: | - Index for the BAM file from tumor tissue - - msisensor_scan: - type: file - description: | - Output file from msisensor-pro scan module - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - list: - type: file - description: | - msisensor-pro final report and associated files (dis, germline, somatic) - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - -authors: - - "@maxulysse" diff --git a/modules/local/msisensorpro/scan/main.nf b/modules/local/msisensorpro/scan/main.nf deleted file mode 100644 index 54377039fb..0000000000 --- a/modules/local/msisensorpro/scan/main.nf +++ /dev/null @@ -1,31 +0,0 @@ -process MSISENSORPRO_SCAN { - tag "$fasta" - label 'process_low' - - conda (params.enable_conda ? "bioconda::msisensor-pro=1.1.a" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/msisensor-pro:1.1.a--hb3646a4_0' : - 'quay.io/biocontainers/msisensor-pro:1.1.a--hb3646a4_0' }" - - input: - path fasta - - output: - tuple val(meta), path("*.list"), emit: list - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - """ - msisensor-pro scan \\ - -d $fasta \\ - -o ${fasta.baseName}.list \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - msisensor-pro: \$(msisensor-pro 2>&1 | sed -nE 's/Version:\\sv([0-9]\\.[0-9])/\\1/ p') - END_VERSIONS - """ -} diff --git a/modules/local/msisensorpro/scan/meta.yml b/modules/local/msisensorpro/scan/meta.yml deleted file mode 100644 index 5e9261aadb..0000000000 --- a/modules/local/msisensorpro/scan/meta.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: msisensorpro_scan - -description: Scan the reference genome to get microsatellites information -keywords: - - microsatellite -tools: - - msisensor: - description: MSIsensor-pro is an updated version of msisensor - homepage: https://github.com/xjtu-omics/msisensor-pro - documentation: None - tool_dev_url: None - doi: "doi.org/10.1016/j.gpb.2020.02.001" - licence: ['free for non-commercial use by academic, government, and non-profit/not-for-profit institutions'] - -input: - - fasta: - type: file - description: FASTA file - pattern: "*.{fa,fasta}" - -output: - - list: - type: file - description: MSIsensor-pro scan output file of minisatellites - pattern: "*.list" - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - -authors: - - "@maxulysse" diff --git a/modules/local/qualimap/bamqc/main.nf b/modules/local/qualimap/bamqc/main.nf deleted file mode 100644 index af3d869529..0000000000 --- a/modules/local/qualimap/bamqc/main.nf +++ /dev/null @@ -1,50 +0,0 @@ -process QUALIMAP_BAMQC { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::qualimap=2.2.2d" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/qualimap:2.2.2d--1' : - 'quay.io/biocontainers/qualimap:2.2.2d--1' }" - - input: - tuple val(meta), path(bam), path(index) - path gff - - output: - tuple val(meta), path("${prefix}"), emit: results - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def collect_pairs = meta.single_end ? '' : '--collect-overlap-pairs' - def memory = task.memory.toGiga() + "G" - def regions = gff ? "--gff $gff" : '' - def strandedness = 'non-strand-specific' - if (meta.strandedness == 'forward') { - strandedness = 'strand-specific-forward' - } else if (meta.strandedness == 'reverse') { - strandedness = 'strand-specific-reverse' - } - """ - unset DISPLAY - mkdir tmp - export _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp - qualimap \\ - --java-mem-size=$memory \\ - bamqc \\ - $args \\ - -bam $bam \\ - $regions \\ - -p $strandedness \\ - $collect_pairs \\ - -outdir $prefix \\ - -nt $task.cpus - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - qualimap: \$(echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/qualimap/bamqc/meta.yml b/modules/local/qualimap/bamqc/meta.yml deleted file mode 100644 index 6888d30e8d..0000000000 --- a/modules/local/qualimap/bamqc/meta.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: qualimap_bamqc -description: Evaluate alignment data -keywords: - - quality control - - qc - - bam -tools: - - qualimap: - description: | - Qualimap 2 is a platform-independent application written in - Java and R that provides both a Graphical User Interface and - a command-line interface to facilitate the quality control of - alignment sequencing data and its derivatives like feature counts. - homepage: http://qualimap.bioinfo.cipf.es/ - documentation: http://qualimap.conesalab.org/doc_html/index.html - doi: 10.1093/bioinformatics/bts503 - licence: ['GPL-2.0-only'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file - pattern: "*.{bam}" - - gff: - type: file - description: Feature file with regions of interest - pattern: "*.{gff,gtf,bed}" - - use_gff: - type: boolean - description: Specifies if feature file should be used or not -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - results: - type: dir - description: Qualimap results dir - pattern: "*/*" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@phue" diff --git a/modules/local/qualimap/bamqccram/main.nf b/modules/local/qualimap/bamqccram/main.nf deleted file mode 100644 index 58a175db34..0000000000 --- a/modules/local/qualimap/bamqccram/main.nf +++ /dev/null @@ -1,54 +0,0 @@ -process QUALIMAP_BAMQC_CRAM { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::qualimap=2.2.2d bioconda::samtools=1.12" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-d3934ca6bb4e61334891ffa2e9a4c87a530e3188:4bf11d12f2c3eccf1eb585097c0b6fd31c18c418-0' : - 'quay.io/biocontainers/mulled-v2-d3934ca6bb4e61334891ffa2e9a4c87a530e3188:4bf11d12f2c3eccf1eb585097c0b6fd31c18c418-0' }" - - input: - tuple val(meta), path(cram), path(crai) - path gff - path fasta - path fasta_fai - - output: - tuple val(meta), path("${prefix}"), emit: results - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def collect_pairs = meta.single_end ? '' : '--collect-overlap-pairs' - def memory = task.memory.toGiga() + "G" - def regions = gff ? "--gff $gff" : '' - def strandedness = 'non-strand-specific' - if (meta.strandedness == 'forward') { - strandedness = 'strand-specific-forward' - } else if (meta.strandedness == 'reverse') { - strandedness = 'strand-specific-reverse' - } - """ - unset DISPLAY - mkdir tmp - export _JAVA_OPTIONS=-Djava.io.tmpdir=./tmp - - samtools view -hb -T ${fasta} ${cram} | - qualimap \\ - --java-mem-size=$memory \\ - bamqc \\ - $args \\ - -bam /dev/stdin \\ - $regions \\ - -p $strandedness \\ - $collect_pairs \\ - -outdir $prefix \\ - -nt $task.cpus - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - qualimap: \$(echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/qualimap/bamqccram/meta.yml b/modules/local/qualimap/bamqccram/meta.yml deleted file mode 100644 index 3c608f31a2..0000000000 --- a/modules/local/qualimap/bamqccram/meta.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: qualimap_bamqc -description: Evaluate alignment data -keywords: - - quality control - - qc - - bam -tools: - - qualimap: - description: | - Qualimap 2 is a platform-independent application written in - Java and R that provides both a Graphical User Interface and - a command-line interface to facilitate the quality control of - alignment sequencing data and its derivatives like feature counts. - homepage: http://qualimap.bioinfo.cipf.es/ - documentation: http://qualimap.conesalab.org/doc_html/index.html - doi: 10.1093/bioinformatics/bts503 -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file - pattern: "*.{bam}" - - gff: - type: file - description: Feature file with regions of interest - pattern: "*.{gff,gtf,bed}" - - use_gff: - type: boolean - description: Specifies if feature file should be used or not -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - results: - type: dir - description: Qualimap results dir - pattern: "*/*" - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" -authors: - - "@phue" diff --git a/modules/local/samtools/fastq/main.nf b/modules/local/samtools/fastq/main.nf deleted file mode 100644 index f65734ac50..0000000000 --- a/modules/local/samtools/fastq/main.nf +++ /dev/null @@ -1,43 +0,0 @@ -process SAMTOOLS_FASTQ { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" - - input: - tuple val(meta), path(input) - - output: - //TODO might be good to have ordered output of the fastq files, so we can - // make sure the we get the right files - tuple val(meta), path("*.fq.gz"), emit: reads - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args ?: '' - - def prefix = task.ext.prefix ?: "${meta.id}" - - - """ - samtools collate -O -@$task.cpus $args $input . | - - samtools \\ - fastq \\ - $args2 \\ - -@ $task.cpus \\ - -1 ${prefix}_1.fq.gz \\ - -2 ${prefix}_2.fq.gz \\ - -0 ${prefix}_other.fq.gz \\ - -s ${prefix}_singleton.fq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/samtools/fastq/meta.yml b/modules/local/samtools/fastq/meta.yml deleted file mode 100644 index f35701c457..0000000000 --- a/modules/local/samtools/fastq/meta.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: samtools_bam2fq -description: | - The module uses bam2fq method from samtools to - convert a SAM, BAM or CRAM file to FASTQ format -keywords: - - bam2fq - - samtools - - fastq -tools: - - samtools: - description: Tools for dealing with SAM, BAM and CRAM files - homepage: None - documentation: http://www.htslib.org/doc/1.1/samtools.html - tool_dev_url: None - doi: "" - licence: ['MIT'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - inputbam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - split: - type: boolean - description: | - TRUE/FALSE value to indicate if reads should be separated into - /1, /2 and if present other, or singleton. - Note: choosing TRUE will generate 4 different files. - Choosing FALSE will produce a single file, which will be interleaved in case - the input contains paired reads. - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - reads: - type: file - description: | - FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton) - or a single interleaved .fq.gz file if the user chooses not to split the reads. - pattern: "*.fq.gz" - -authors: - - "@lescai" diff --git a/modules/local/samtools/index/main.nf b/modules/local/samtools/index/main.nf deleted file mode 100644 index b0378d17b4..0000000000 --- a/modules/local/samtools/index/main.nf +++ /dev/null @@ -1,29 +0,0 @@ -process SAMTOOLS_INDEX { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" - - input: - tuple val(meta), path(input) - - output: - tuple val(meta), path("*.bam", includeInputs:true), path("*.bai") , optional:true, emit: bam_bai - tuple val(meta), path("*.bam", includeInputs:true), path("*.csi") , optional:true, emit: bam_csi - tuple val(meta), path("*.cram", includeInputs:true), path("*.crai"), optional:true, emit: cram_crai - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - """ - samtools index -@ ${task.cpus-1} $args $input - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/samtools/index/meta.yml b/modules/local/samtools/index/meta.yml deleted file mode 100644 index 0905b3cd69..0000000000 --- a/modules/local/samtools/index/meta.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: samtools_index -description: Index SAM/BAM/CRAM file -keywords: - - index - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ['MIT'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - crai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - csi: - type: file - description: CSI index file - pattern: "*.{csi}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@ewels" - - "@maxulysse" diff --git a/modules/local/samtools/mergecram/main.nf b/modules/local/samtools/mergecram/main.nf deleted file mode 100644 index a3a8497e28..0000000000 --- a/modules/local/samtools/mergecram/main.nf +++ /dev/null @@ -1,28 +0,0 @@ -process SAMTOOLS_MERGE_CRAM { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" - - input: - tuple val(meta), path(crams) - path fasta - - output: - tuple val(meta), path("*.cram"), emit: cram - path "versions.yml" , emit: versions - - script: - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - """ - samtools merge -@${task.cpus} --reference ${fasta} ${prefix}.cram $crams - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/samtools/mergecram/meta.yml b/modules/local/samtools/mergecram/meta.yml deleted file mode 100644 index 9092f22e50..0000000000 --- a/modules/local/samtools/mergecram/meta.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: samtools_merge -description: Merge BAM file -keywords: - - merge - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file - pattern: "*.{bam,cram,sam}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - merged_bam: - type: file - description: BAM file - pattern: "*.{bam}" - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" -authors: - - "@drpatelh" - - "@yuukiiwa " - - "@maxulysse" diff --git a/modules/local/samtools/reindex_bam/environment.yml b/modules/local/samtools/reindex_bam/environment.yml new file mode 100644 index 0000000000..da2df5e43a --- /dev/null +++ b/modules/local/samtools/reindex_bam/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/local/samtools/reindex_bam/main.nf b/modules/local/samtools/reindex_bam/main.nf new file mode 100644 index 0000000000..153f9093d6 --- /dev/null +++ b/modules/local/samtools/reindex_bam/main.nf @@ -0,0 +1,57 @@ +/** + * The aim of this process is to re-index the bam file without the duplicate, supplementary, unmapped etc, for goleft/indexcov + * It creates a BAM containing only a header (so indexcov can get the sample name) and a BAM index were low quality reads, supplementary etc, have been removed + */ +process SAMTOOLS_REINDEX_BAM { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(input_index) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("${meta.id}.reindex.bam"), path("${meta.id}.reindex.bam.bai"),emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def reference = fasta ? "--reference ${fasta}" : "" + """ + # write header only + samtools \\ + view \\ + --header-only \\ + --threads ${task.cpus} \\ + -O BAM \\ + -o "${meta.id}.reindex.bam" \\ + ${reference} \\ + ${input} + + # write BAM index only, remove unmapped, supplementary, etc... + samtools \\ + view \\ + --uncompressed \\ + --write-index \\ + --threads ${task.cpus} \\ + -O BAM \\ + -o "/dev/null##idx##${meta.id}.reindex.bam.bai" \\ + ${reference} \\ + ${args} \\ + ${input} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/samtools/viewindex/main.nf b/modules/local/samtools/viewindex/main.nf deleted file mode 100644 index cb6e087908..0000000000 --- a/modules/local/samtools/viewindex/main.nf +++ /dev/null @@ -1,34 +0,0 @@ -//There is a -L option to only output alignments in interval, might be an option for exons/panel data? -process SAMTOOLS_VIEWINDEX { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" - - input: - tuple val(meta), path(input), path(index) - path fasta - path fai_fai - - output: - tuple val(meta), path("*.bam"), path("*.bai") , optional: true, emit: bam_bai - tuple val(meta), path("*.cram"), path("*.crai"), optional: true, emit: cram_crai - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def reference_command = fasta ? "--reference ${fasta} -C" : "" - """ - samtools view --threads ${task.cpus-1} ${reference_command} $args $input > ${prefix}.cram - samtools index -@${task.cpus} ${prefix}.cram - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/samtools/viewindex/meta.yml b/modules/local/samtools/viewindex/meta.yml deleted file mode 100644 index c35a8b0312..0000000000 --- a/modules/local/samtools/viewindex/meta.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: samtools_view -description: filter/convert SAM/BAM/CRAM file -keywords: - - view - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: filtered/converted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" -authors: - - "@drpatelh" - - "@joseespinosa" diff --git a/modules/local/samtoolsview.nf b/modules/local/samtoolsview.nf deleted file mode 100644 index 04c0d12642..0000000000 --- a/modules/local/samtoolsview.nf +++ /dev/null @@ -1,42 +0,0 @@ -process SAMTOOLS_VIEW { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" - - input: - tuple val(meta), path(input) - path fasta - - output: - tuple val(meta), path("*.bam"), path("*.bai") , emit: bam - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "--reference ${fasta}" : "" - //if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - samtools \\ - view \\ - --threads ${task.cpus-1} \\ - ${reference} \\ - $args \\ - $input \\ - $args2 \\ - > ${prefix}.bam - - samtools index ${prefix}.bam - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/ascat/environment.yml b/modules/nf-core/ascat/environment.yml new file mode 100644 index 0000000000..d19645cb3f --- /dev/null +++ b/modules/nf-core/ascat/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::ascat=3.2.0 + - bioconda::cancerit-allelecount=4.3.0 diff --git a/modules/nf-core/ascat/main.nf b/modules/nf-core/ascat/main.nf new file mode 100644 index 0000000000..b18948f525 --- /dev/null +++ b/modules/nf-core/ascat/main.nf @@ -0,0 +1,257 @@ +process ASCAT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/4c/4cf02c7911ee5e974ce7db978810770efbd8d872ff5ab3462d2a11bcf022fab5/data': + 'community.wave.seqera.io/library/ascat_cancerit-allelecount:c3e8749fa4af0e99' }" + + input: + tuple val(meta), path(input_normal), path(index_normal), path(input_tumor), path(index_tumor) + path(allele_files) + path(loci_files) + path(bed_file) // optional + path(fasta) // optional + path(gc_file) // optional + path(rt_file) // optional + + output: + tuple val(meta), path("*alleleFrequencies_chr*.txt"), emit: allelefreqs + tuple val(meta), path("*BAF.txt") , emit: bafs + tuple val(meta), path("*cnvs.txt") , emit: cnvs + tuple val(meta), path("*LogR.txt") , emit: logrs + tuple val(meta), path("*metrics.txt") , emit: metrics + tuple val(meta), path("*png") , emit: png + tuple val(meta), path("*purityploidy.txt") , emit: purityploidy + tuple val(meta), path("*segments.txt") , emit: segments + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def gender = args.gender ? "${args.gender}" : "NULL" + def genomeVersion = args.genomeVersion ? "${args.genomeVersion}" : "NULL" + def purity = args.purity ? "${args.purity}" : "NULL" + def ploidy = args.ploidy ? "${args.ploidy}" : "NULL" + def gc_input = gc_file ? "${gc_file}" : "NULL" + def rt_input = rt_file ? "${rt_file}" : "NULL" + + def minCounts_arg = args.minCounts ? ", minCounts = ${args.minCounts}" : "" + def bed_file_arg = bed_file ? ", BED_file = '${bed_file}'" : "" + def chrom_names_arg = args.chrom_names ? ", chrom_names = ${args.chrom_names}" : "" + def min_base_qual_arg = args.min_base_qual ? ", min_base_qual = ${args.min_base_qual}" : "" + def min_map_qual_arg = args.min_map_qual ? ", min_map_qual = ${args.min_map_qual}" : "" + def skip_allele_counting_tumour_arg = args.skip_allele_counting_tumour ? ", skip_allele_counting_tumour = ${args.skip_allele_counting_tumour}" : "" + def skip_allele_counting_normal_arg = args.skip_allele_counting_normal ? ", skip_allele_counting_normal = ${args.skip_allele_counting_normal}" : "" + + if(args.additional_allelecounter_flags && fasta) { + additional_allelecounter_arg = ", additional_allelecounter_flags = \"${args.additional_allelecounter_flags} -r ${fasta}\" " + } else if (args.additional_allelecounter_flags ) { + additional_allelecounter_arg = ", additional_allelecounter_flags = \"${args.additional_allelecounter_flags}\" " + } else if (fasta) { + additional_allelecounter_arg = ", additional_allelecounter_flags = '-r \"${fasta}\"'" + } else { + additional_allelecounter_arg = "" + } + + """ + #!/usr/bin/env Rscript + library(RColorBrewer) + library(ASCAT) + options(bitmapType='cairo') + + if(dir.exists("${allele_files}")) { + # expected production use of a directory + allele_path = normalizePath("${allele_files}") + allele_prefix = paste0(allele_path, "/", "${allele_files}", "_chr") + } else if(file.exists("${allele_files}")) { + # expected testing use of a single file + allele_path = basename(normalizePath("${allele_files}")) + allele_prefix = sub('_chr[0-9]+\\\\.txt\$', "_chr", allele_path) + } else { + stop("The specified allele files do not exist.") + } + + if(length(Sys.glob(paste0(allele_prefix,"*")) ) == 0) { + stop(paste("No allele files found matching", allele_prefix)) + } + + if(dir.exists("${loci_files}")) { + # expected production use of a directory + loci_path = normalizePath("${loci_files}") + loci_prefix = paste0(loci_path, "/", "${loci_files}", "_chr") + } else if(file.exists("${loci_files}")) { + # expected testing use of a single file + loci_path = basename(normalizePath("${loci_files}")) + loci_prefix = sub('_chr[0-9]+\\\\.txt\$', "_chr", loci_path) + } else { + stop("The specified loci files do not exist.") + } + + if(length(Sys.glob(paste0(loci_prefix,"*")) ) == 0) { + stop(paste("No loci files found matching", loci_prefix)) + } + + # Prepare from BAM files + ascat.prepareHTS( + tumourseqfile = "${input_tumor}", + normalseqfile = "${input_normal}", + tumourname = paste0("${prefix}", ".tumour"), + normalname = paste0("${prefix}", ".normal"), + allelecounter_exe = "alleleCounter", + alleles.prefix = allele_prefix, + loci.prefix = loci_prefix, + gender = "${gender}", + genomeVersion = "${genomeVersion}", + nthreads = ${task.cpus} + ${minCounts_arg} + ${bed_file_arg} + ${chrom_names_arg} + ${min_base_qual_arg} + ${min_map_qual_arg} + ${skip_allele_counting_tumour_arg} + ${skip_allele_counting_normal_arg} + ${additional_allelecounter_arg} + , seed = 42 + ) + + # Load the data + ascat.bc = ascat.loadData( + Tumor_LogR_file = paste0("${prefix}", ".tumour_tumourLogR.txt"), + Tumor_BAF_file = paste0("${prefix}", ".tumour_tumourBAF.txt"), + Germline_LogR_file = paste0("${prefix}", ".tumour_normalLogR.txt"), + Germline_BAF_file = paste0("${prefix}", ".tumour_normalBAF.txt"), + genomeVersion = "${genomeVersion}", + gender = "${gender}" + ) + + # Plot the raw data + ascat.plotRawData(ascat.bc, img.prefix = paste0("${prefix}", ".before_correction.")) + + # Optional LogRCorrection + if("${gc_input}" != "NULL") { + + if(dir.exists("${gc_input}")) { + # sarek production use of an unzipped folder containing one file + gc_input = list.files("${gc_input}", recursive = TRUE, full.names = TRUE) + if(length(gc_input) != 1 | !file.exists(gc_input)) { + stop("A single gc_input should be provided!") + } + } else if(file.exists("${gc_input}")) { + gc_input = normalizePath("${gc_input}") + } else { + stop("gc_input must be a file or folder containing one file") + } + + if("${rt_input}" != "NULL"){ + + if(dir.exists("${rt_input}")) { + # sarek production use of an unzipped folder containing one file + rt_input = list.files("${rt_input}", recursive = TRUE, full.names = TRUE) + if(length(rt_input) != 1 | !file.exists(rt_input)) { + stop("A single rt_input should be provided!") + } + } else if(file.exists("${rt_input}")) { + rt_input = normalizePath("${rt_input}") + } else { + stop("rt_input must be a file or folder containing one file") + } + + ascat.bc = ascat.correctLogR(ascat.bc, GCcontentfile = gc_input, replictimingfile = rt_input) + # Plot raw data after correction + ascat.plotRawData(ascat.bc, img.prefix = paste0("${prefix}", ".after_correction_gc_rt.")) + } + else { + ascat.bc = ascat.correctLogR(ascat.bc, GCcontentfile = gc_input, replictimingfile = ${rt_input}) + # Plot raw data after correction + ascat.plotRawData(ascat.bc, img.prefix = paste0("${prefix}", ".after_correction_gc.")) + } + } + + # Segment the data + ascat.bc = ascat.aspcf(ascat.bc, seed=42) + + # Plot the segmented data + ascat.plotSegmentedData(ascat.bc) + + # Run ASCAT to fit every tumor to a model, inferring ploidy, normal cell contamination, + # and discrete copy numbers + # If psi and rho are manually set: + if (!is.null(${purity}) && !is.null(${ploidy})){ + ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=${purity}, psi_manual=${ploidy}) + } else if(!is.null(${purity}) && is.null(${ploidy})){ + ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=${purity}) + } else if(!is.null(${ploidy}) && is.null(${purity})){ + ascat.output <- ascat.runAscat(ascat.bc, gamma=1, psi_manual=${ploidy}) + } else { + ascat.output <- ascat.runAscat(ascat.bc, gamma=1) + } + + # Extract metrics from ASCAT profiles + QC = ascat.metrics(ascat.bc,ascat.output) + + # Write out segmented regions (including regions with one copy of each allele) + write.table(ascat.output[["segments"]], file=paste0("${prefix}", ".segments.txt"), sep="\t", quote=F, row.names=F) + + # Write out CNVs in bed format + cnvs=ascat.output[["segments"]][2:6] + write.table(cnvs, file=paste0("${prefix}",".cnvs.txt"), sep="\t", quote=F, row.names=F, col.names=T) + + # Write out purity and ploidy info + summary <- tryCatch({ + matrix(c(ascat.output[["aberrantcellfraction"]], ascat.output[["ploidy"]]), ncol=2, byrow=TRUE)}, error = function(err) { + # error handler picks up where error was generated + print(paste("Could not find optimal solution: ",err)) + return(matrix(c(0,0),nrow=1,ncol=2,byrow = TRUE)) + } + ) + colnames(summary) <- c("AberrantCellFraction","Ploidy") + write.table(summary, file=paste0("${prefix}",".purityploidy.txt"), sep="\t", quote=F, row.names=F, col.names=T) + + write.table(QC, file=paste0("${prefix}", ".metrics.txt"), sep="\t", quote=F, row.names=F) + + # Version export + f <- file("versions.yml","w") + alleleCounter_version = system(paste("alleleCounter --version"), intern = T) + ascat_version = as.character(packageVersion('ASCAT')) + writeLines(paste0('"', "${task.process}", '"', ":"), f) + writeLines(paste(" ascat:", ascat_version), f) + writeLines(paste(" alleleCounter:", alleleCounter_version), f) + close(f) + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.after_correction.gc_rt.test.tumour.germline.png + touch ${prefix}.after_correction.gc_rt.test.tumour.tumour.png + touch ${prefix}.before_correction.test.tumour.germline.png + touch ${prefix}.before_correction.test.tumour.tumour.png + touch ${prefix}.cnvs.txt + touch ${prefix}.metrics.txt + touch ${prefix}.normal_alleleFrequencies_chr21.txt + touch ${prefix}.normal_alleleFrequencies_chr22.txt + touch ${prefix}.purityploidy.txt + touch ${prefix}.segments.txt + touch ${prefix}.tumour.ASPCF.png + touch ${prefix}.tumour.sunrise.png + touch ${prefix}.tumour_alleleFrequencies_chr21.txt + touch ${prefix}.tumour_alleleFrequencies_chr22.txt + touch ${prefix}.tumour_normalBAF.txt + touch ${prefix}.tumour_normalLogR.txt + touch ${prefix}.tumour_tumourBAF.txt + touch ${prefix}.tumour_tumourLogR.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bioconductor-ascat: \$(Rscript -e "library(ASCAT); cat(as.character(packageVersion('ASCAT')))") + alleleCounter: \$(alleleCounter --version) + END_VERSIONS + """ + + +} diff --git a/modules/nf-core/ascat/meta.yml b/modules/nf-core/ascat/meta.yml new file mode 100644 index 0000000000..db7c92926a --- /dev/null +++ b/modules/nf-core/ascat/meta.yml @@ -0,0 +1,165 @@ +name: ascat +description: copy number profiles of tumour cells. +keywords: + - bam + - copy number + - cram +tools: + - ascat: + description: ASCAT is a method to derive copy number profiles of tumour cells, + accounting for normal cell admixture and tumour aneuploidy. ASCAT infers tumour + purity (the fraction of tumour cells) and ploidy (the amount of DNA per tumour + cell), expressed as multiples of haploid genomes from SNP array or massively + parallel sequencing data, and calculates whole-genome allele-specific copy number + profiles (the number of copies of both parental alleles for all SNP loci across + the genome). + documentation: https://github.com/VanLoo-lab/ascat/tree/master/man + tool_dev_url: https://github.com/VanLoo-lab/ascat + doi: "10.1093/bioinformatics/btaa538" + licence: ["GPL v3"] + identifier: biotools:ascat +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_normal: + type: file + description: BAM/CRAM file, must adhere to chr1, chr2, ...chrX notation For + modifying chromosome notation in bam files please follow + https://josephcckuo.wordpress.com/2016/11/17/modify-chromosome-notation-in-bam-file/. + pattern: "*.{bam,cram}" + - index_normal: + type: file + description: index for normal_bam/cram + pattern: "*.{bai,crai}" + - input_tumor: + type: file + description: BAM/CRAM file, must adhere to chr1, chr2, ...chrX notation + pattern: "*.{bam,cram}" + - index_tumor: + type: file + description: index for tumor_bam/cram + pattern: "*.{bai,crai}" + - - allele_files: + type: file + description: allele files for ASCAT WGS. Can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS + - - loci_files: + type: file + description: loci files for ASCAT WGS. Loci files without chromosome notation + can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS + Make sure the chromosome notation matches the bam/cram input files. To add + the chromosome notation to loci files (hg19/hg38) if necessary, you can run + this command `if [[ $(samtools view | head -n1 | cut -f3)\" + == *\"chr\"* ]]; then for i in {1..22} X; do sed -i 's/^/chr/' G1000_loci_hg19_chr_${i}.txt; + done; fi` + - - bed_file: + type: file + description: Bed file for ASCAT WES (optional, but recommended for WES) + - - fasta: + type: file + description: Reference fasta file (optional) + - - gc_file: + type: file + description: GC correction file (optional) - Used to do logR correction of the + tumour sample(s) with genomic GC content + - - rt_file: + type: file + description: replication timing correction file (optional, provide only in combination + with gc_file) +output: + - allelefreqs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*alleleFrequencies_chr*.txt": + type: file + description: Files containing allee frequencies per chromosome + pattern: "*{alleleFrequencies_chr*.txt}" + - bafs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*BAF.txt": + type: file + description: BAF file + - cnvs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*cnvs.txt": + type: file + description: CNV file + - logrs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*LogR.txt": + type: file + description: LogR file + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*metrics.txt": + type: file + description: File containing quality metrics + pattern: "*.{metrics.txt}" + - png: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*png": + type: file + description: ASCAT plots + pattern: "*.{png}" + - purityploidy: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*purityploidy.txt": + type: file + description: File with purity and ploidy data + pattern: "*.{purityploidy.txt}" + - segments: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*segments.txt": + type: file + description: File with segments data + pattern: "*.{segments.txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@aasNGC" + - "@lassefolkersen" + - "@FriederikeHanssen" + - "@maxulysse" + - "@SusiJo" +maintainers: + - "@aasNGC" + - "@lassefolkersen" + - "@FriederikeHanssen" + - "@maxulysse" + - "@SusiJo" diff --git a/modules/nf-core/bbmap/bbsplit/environment.yml b/modules/nf-core/bbmap/bbsplit/environment.yml new file mode 100644 index 0000000000..a33ddca858 --- /dev/null +++ b/modules/nf-core/bbmap/bbsplit/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bbmap=39.18 + - pigz=2.8 diff --git a/modules/nf-core/bbmap/bbsplit/main.nf b/modules/nf-core/bbmap/bbsplit/main.nf new file mode 100644 index 0000000000..bce2208321 --- /dev/null +++ b/modules/nf-core/bbmap/bbsplit/main.nf @@ -0,0 +1,130 @@ +process BBMAP_BBSPLIT { + tag "$meta.id" + label 'process_high' + label 'error_retry' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/5a/5aae5977ff9de3e01ff962dc495bfa23f4304c676446b5fdf2de5c7edfa2dc4e/data' : + 'community.wave.seqera.io/library/bbmap_pigz:07416fe99b090fa9' }" + + input: + tuple val(meta), path(reads) + path index, name: 'input_index' + path primary_ref + tuple val(other_ref_names), path(other_ref_paths) + val only_build_index + + output: + path "bbsplit_index" , optional:true, emit: index + tuple val(meta), path('*primary*fastq.gz'), optional:true, emit: primary_fastq + tuple val(meta), path('*fastq.gz') , optional:true, emit: all_fastq + tuple val(meta), path('*txt') , optional:true, emit: stats + tuple val(meta), path('*.log') , optional:true, emit: log + tuple val("${task.process}"), val('bbmap'), eval('bbversion.sh | grep -v "Duplicate cpuset"'), topic: versions, emit: versions_bbmap + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[BBSplit] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + def other_refs = [] + other_ref_names.eachWithIndex { name, idx -> + other_refs << "ref_${name}=${other_ref_paths[idx]}" + } + + def fastq_in='' + def fastq_out='' + def index_files='' + def refstats_cmd='' + def use_index = index ? true : false + + if (only_build_index) { + if (primary_ref && other_ref_names && other_ref_paths) { + index_files = 'ref_primary=' +primary_ref + ' ' + other_refs.join(' ') + ' path=bbsplit_build' + } else { + log.error 'ERROR: Please specify as input a primary fasta file along with names and paths to non-primary fasta files.' + } + } else { + if (index) { + index_files = "path=index_writable" + } else if (primary_ref && other_ref_names && other_ref_paths) { + index_files = "ref_primary=${primary_ref} ${other_refs.join(' ')}" + } else { + log.error 'ERROR: Please either specify a BBSplit index as input or a primary fasta file along with names and paths to non-primary fasta files.' + } + fastq_in = meta.single_end ? "in=${reads}" : "in=${reads[0]} in2=${reads[1]}" + fastq_out = meta.single_end ? "basename=${prefix}_%.fastq.gz" : "basename=${prefix}_%_#.fastq.gz" + refstats_cmd = 'refstats=' + prefix + '.stats.txt' + } + """ + + # If using a pre-built index, create writable structure: symlink all files except + # summary.txt (which we copy to modify). When we stage in the index files the time + # stamps get disturbed, which bbsplit doesn't like. Fix the time stamps in summaries. + if [ "$use_index" == "true" ]; then + find input_index/ref -type f | while read -r f; do + target="index_writable/\${f#input_index/}" + mkdir -p "\$(dirname "\$target")" + [[ \$(basename "\$f") == "summary.txt" ]] && cp "\$f" "\$target" || ln -s "\$(realpath "\$f")" "\$target" + done + find index_writable/ref/genome -name summary.txt | while read -r summary_file; do + src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/ref/|index_writable/ref/|') + mod=\$(echo "System.out.println(java.nio.file.Files.getLastModifiedTime(java.nio.file.Paths.get(\\"\$src\\")).toMillis());" | jshell -J-Djdk.lang.Process.launchMechanism=vfork - 2>/dev/null | grep -oE '^[0-9]{12,14}\$') + sed -e 's|bbsplit_index/ref|index_writable/ref|' -e "s|^last modified.*|last modified\\t\$mod|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} + done + fi + + # Run BBSplit + + bbsplit.sh \\ + -Xmx${avail_mem}M \\ + $index_files \\ + threads=$task.cpus \\ + $fastq_in \\ + $fastq_out \\ + $refstats_cmd \\ + $args 2>| >(tee ${prefix}.log >&2) + + # Summary files will have an absolute path that will make the index + # impossible to use in other processes - fix paths and rename atomically + if [ -d bbsplit_build/ref/genome ]; then + find bbsplit_build/ref/genome -name summary.txt | while read -r summary_file; do + sed "s|^source.*|source\\t\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit_build|bbsplit_index|')|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} + done + mv bbsplit_build bbsplit_index + fi + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def other_refs = '' + other_ref_names.eachWithIndex { name, _idx -> + other_refs += "echo '' | gzip > ${prefix}_${name}.fastq.gz" + } + def will_build_index = only_build_index || (!index && primary_ref && other_ref_names && other_ref_paths) + """ + # Create index directory if building an index (either only_build_index or on-the-fly) + if [ "${will_build_index}" == "true" ]; then + mkdir -p bbsplit_index + fi + + # Only create output files if splitting (not just building index) + if ! (${only_build_index}); then + echo '' | gzip > ${prefix}_primary.fastq.gz + ${other_refs} + touch ${prefix}.stats.txt + fi + + touch ${prefix}.log + """ +} diff --git a/modules/nf-core/bbmap/bbsplit/meta.yml b/modules/nf-core/bbmap/bbsplit/meta.yml new file mode 100644 index 0000000000..ecab992d0d --- /dev/null +++ b/modules/nf-core/bbmap/bbsplit/meta.yml @@ -0,0 +1,129 @@ +name: bbmap_bbsplit +description: Split sequencing reads by mapping them to multiple references simultaneously +keywords: + - align + - map + - fastq + - genome + - reference +tools: + - bbmap: + description: BBMap is a short read aligner, as well as various other bioinformatic + tools. + homepage: https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/ + documentation: https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/ + licence: ["UC-LBL license (see package)"] + identifier: biotools:bbmap +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + ontologies: [] + - index: + type: directory + description: Directory to place generated index + pattern: "*" + - primary_ref: + type: file + description: Path to the primary reference + pattern: "*" + ontologies: [] + - - other_ref_names: + type: list + description: List of other reference ids apart from the primary + - other_ref_paths: + type: list + description: Path to other references paths corresponding to "other_ref_names" + - only_build_index: + type: string + description: true = only build index; false = mapping +output: + index: + - bbsplit_index: + type: directory + description: Directory with index files + pattern: "bbsplit_index" + primary_fastq: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*primary*fastq.gz": + type: file + description: Output reads that map to the primary reference + pattern: "*primary*fastq.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + all_fastq: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*fastq.gz": + type: file + description: All reads mapping to any of the references + pattern: "*fastq.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + stats: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*txt": + type: file + description: Tab-delimited text file containing mapping statistics + pattern: "*.txt" + ontologies: [] + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Log file + pattern: "*.log" + ontologies: [] + versions_bbmap: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bbmap: + type: string + description: The tool name + - bbversion.sh | grep -v "Duplicate cpuset": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bbmap: + type: string + description: The tool name + - bbversion.sh | grep -v "Duplicate cpuset": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@joseespinosa" + - "@drpatelh" + - "@pinin4fjords" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@pinin4fjords" diff --git a/modules/nf-core/bcftools/annotate/environment.yml b/modules/nf-core/bcftools/annotate/environment.yml new file mode 100644 index 0000000000..557488607c --- /dev/null +++ b/modules/nf-core/bcftools/annotate/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.21 diff --git a/modules/nf-core/bcftools/annotate/main.nf b/modules/nf-core/bcftools/annotate/main.nf new file mode 100644 index 0000000000..095643369c --- /dev/null +++ b/modules/nf-core/bcftools/annotate/main.nf @@ -0,0 +1,87 @@ +process BCFTOOLS_ANNOTATE { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/5a/5acacb55c52bec97c61fd34ffa8721fce82ce823005793592e2a80bf71632cd0/data' + : 'community.wave.seqera.io/library/bcftools:1.21--4335bec1d7b44d11'}" + + input: + tuple val(meta), path(input), path(index), path(annotations), path(annotations_index) + path columns + path header_lines + path rename_chrs + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi"), emit: tbi, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def annotations_file = annotations ? "--annotations ${annotations}" : '' + def columns_file = columns ? "--columns-file ${columns}" : '' + def header_file = header_lines ? "--header-lines ${header_lines}" : '' + def rename_chrs_file = rename_chrs ? "--rename-chrs ${rename_chrs}" : '' + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : "vcf" + def index_command = !index ? "bcftools index ${input}" : '' + + if ("${input}" == "${prefix}.${extension}") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + """ + ${index_command} + + bcftools \\ + annotate \\ + ${args} \\ + ${annotations_file} \\ + ${columns_file} \\ + ${header_file} \\ + ${rename_chrs_file} \\ + --output ${prefix}.${extension} \\ + --threads ${task.cpus} \\ + ${input} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def index_extension = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index_extension.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index_extension}" : "" + + if ("${input}" == "${prefix}.${extension}") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/annotate/meta.yml b/modules/nf-core/bcftools/annotate/meta.yml new file mode 100644 index 0000000000..058954d21d --- /dev/null +++ b/modules/nf-core/bcftools/annotate/meta.yml @@ -0,0 +1,98 @@ +name: bcftools_annotate +description: Add or remove annotations. +keywords: + - bcftools + - annotate + - vcf + - remove + - add +tools: + - annotate: + description: Add or remove annotations. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: https://samtools.github.io/bcftools/bcftools.html#annotate + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Query VCF or BCF file, can be either uncompressed or compressed + ontologies: [] + - index: + type: file + description: Index of the query VCF or BCF file + ontologies: [] + - annotations: + type: file + description: Bgzip-compressed file with annotations + ontologies: [] + - annotations_index: + type: file + description: Index of the annotations file + ontologies: [] + - columns: + type: file + description: List of columns in the annotations file, one name per row + ontologies: [] + - header_lines: + type: file + description: Contains lines to append to the output VCF header + ontologies: [] + - rename_chrs: + type: file + description: Rename annotations according to this file containing "old_name new_name\n" + pairs separated by whitespaces, each on a separate line. + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: Compressed annotated VCF file + pattern: "*{vcf,vcf.gz,bcf,bcf.gz}" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@projectoriented" + - "@ramprasadn" +maintainers: + - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/concat/environment.yml b/modules/nf-core/bcftools/concat/environment.yml new file mode 100644 index 0000000000..557488607c --- /dev/null +++ b/modules/nf-core/bcftools/concat/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.21 diff --git a/modules/nf-core/bcftools/concat/main.nf b/modules/nf-core/bcftools/concat/main.nf new file mode 100644 index 0000000000..7f6874e553 --- /dev/null +++ b/modules/nf-core/bcftools/concat/main.nf @@ -0,0 +1,72 @@ +process BCFTOOLS_CONCAT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/5a/5acacb55c52bec97c61fd34ffa8721fce82ce823005793592e2a80bf71632cd0/data': + 'community.wave.seqera.io/library/bcftools:1.21--4335bec1d7b44d11' }" + + input: + tuple val(meta), path(vcfs), path(tbi) + + output: + tuple val(meta), path("${prefix}.${extension}") , emit: vcf + tuple val(meta), path("${prefix}.${extension}.tbi"), emit: tbi, optional: true + tuple val(meta), path("${prefix}.${extension}.csi"), emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def tbi_names = tbi.findAll { file -> !(file instanceof List) }.collect { file -> file.name } + def create_input_index = vcfs.collect { vcf -> tbi_names.contains(vcf.name + ".tbi") || tbi_names.contains(vcf.name + ".csi") ? "" : "tabix ${vcf}" }.join("\n ") + extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def input = vcfs.sort{it.toString()}.join(" ") + """ + ${create_input_index} + + bcftools concat \\ + --output ${prefix}.${extension} \\ + $args \\ + --threads $task.cpus \\ + ${input} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def index_extension = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index_extension.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index_extension}" : "" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/concat/meta.yml b/modules/nf-core/bcftools/concat/meta.yml new file mode 100644 index 0000000000..3d12673a29 --- /dev/null +++ b/modules/nf-core/bcftools/concat/meta.yml @@ -0,0 +1,80 @@ +name: bcftools_concat +description: Concatenate VCF files +keywords: + - variant calling + - concat + - bcftools + - VCF +tools: + - concat: + description: | + Concatenate VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcfs: + type: list + description: | + List containing 2 or more vcf files + e.g. [ 'file1.vcf', 'file2.vcf' ] + - tbi: + type: list + description: | + List containing 2 or more index files (optional) + e.g. [ 'file1.tbi', 'file2.tbi' ] +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${extension}: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${extension}.tbi: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.tbi" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${extension}.csi: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@nvnieuwk" +maintainers: + - "@abhi18av" + - "@nvnieuwk" diff --git a/modules/nf-core/bcftools/isec/environment.yml b/modules/nf-core/bcftools/isec/environment.yml new file mode 100644 index 0000000000..ba863b388f --- /dev/null +++ b/modules/nf-core/bcftools/isec/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/isec/main.nf b/modules/nf-core/bcftools/isec/main.nf new file mode 100644 index 0000000000..c9bdf4ce34 --- /dev/null +++ b/modules/nf-core/bcftools/isec/main.nf @@ -0,0 +1,51 @@ +process BCFTOOLS_ISEC { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data': + 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f' }" + + input: + tuple val(meta), path(vcfs), path(tbis) + + output: + tuple val(meta), path("${prefix}", type: "dir"), emit: results + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + bcftools isec \\ + $args \\ + -p $prefix \\ + ${vcfs} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + touch ${prefix}/README.txt + touch ${prefix}/sites.txt + echo "" | gzip > ${prefix}/0000.vcf.gz + touch ${prefix}/0000.vcf.gz.tbi + echo "" | gzip > ${prefix}/0001.vcf.gz + touch ${prefix}/0001.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/isec/meta.yml b/modules/nf-core/bcftools/isec/meta.yml new file mode 100644 index 0000000000..55cb49f4c7 --- /dev/null +++ b/modules/nf-core/bcftools/isec/meta.yml @@ -0,0 +1,58 @@ +name: bcftools_isec +description: Apply set operations to VCF files +keywords: + - variant calling + - intersect + - union + - complement + - VCF + - BCF +tools: + - isec: + description: | + Computes intersections, unions and complements of VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcfs: + type: list + description: | + List containing 2 or more vcf/bcf files. These must be compressed and have an associated index. + e.g. [ 'file1.vcf.gz', 'file2.vcf' ] + - tbis: + type: list + description: | + List containing the tbi index files corresponding to the vcf/bcf input files + e.g. [ 'file1.vcf.tbi', 'file2.vcf.tbi' ] +output: + results: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}: + type: directory + description: Folder containing the set operations results perform on the vcf files + pattern: "${prefix}" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/merge/environment.yml b/modules/nf-core/bcftools/merge/environment.yml new file mode 100644 index 0000000000..ba863b388f --- /dev/null +++ b/modules/nf-core/bcftools/merge/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/merge/main.nf b/modules/nf-core/bcftools/merge/main.nf new file mode 100644 index 0000000000..c560a90212 --- /dev/null +++ b/modules/nf-core/bcftools/merge/main.nf @@ -0,0 +1,74 @@ +process BCFTOOLS_MERGE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data': + 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f' }" + + input: + tuple val(meta), path(vcfs), path(tbis) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(bed) + + output: + tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: vcf + tuple val(meta), path("*.{csi,tbi}") , emit: index, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def input = (vcfs.collect().size() > 1) ? vcfs.sort{ it.name } : vcfs + def regions = bed ? "--regions-file $bed" : "" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + """ + bcftools merge \\ + $args \\ + $regions \\ + --threads $task.cpus \\ + --output ${prefix}.${extension} \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/merge/meta.yml b/modules/nf-core/bcftools/merge/meta.yml new file mode 100644 index 0000000000..09af245a2c --- /dev/null +++ b/modules/nf-core/bcftools/merge/meta.yml @@ -0,0 +1,105 @@ +name: bcftools_merge +description: Merge VCF files +keywords: + - variant calling + - merge + - VCF +tools: + - merge: + description: | + Merge VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcfs: + type: file + description: | + List containing 2 or more vcf files + e.g. [ 'file1.vcf', 'file2.vcf' ] + ontologies: [] + - tbis: + type: file + description: | + List containing the tbi index files corresponding to the vcfs input files + e.g. [ 'file1.vcf.tbi', 'file2.vcf.tbi' ] + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: "(Optional) The fasta reference file (only necessary for the `--gvcf + FILE` parameter)" + pattern: "*.{fasta,fa}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: "(Optional) The fasta reference file index (only necessary for + the `--gvcf FILE` parameter)" + pattern: "*.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing bed information + e.g. [ id:'genome' ] + - bed: + type: file + description: "(Optional) The bed regions to merge on" + pattern: "*.bed" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{bcf,vcf}{,.gz}": + type: file + description: merged output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + ontologies: [] + index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{csi,tbi}": + type: file + description: index of merged output + pattern: "*.{csi,tbi}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@joseespinosa" + - "@drpatelh" + - "@nvnieuwk" + - "@ramprasadn" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@nvnieuwk" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/mpileup/environment.yml b/modules/nf-core/bcftools/mpileup/environment.yml new file mode 100644 index 0000000000..557488607c --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.21 diff --git a/modules/nf-core/bcftools/mpileup/main.nf b/modules/nf-core/bcftools/mpileup/main.nf new file mode 100644 index 0000000000..f712b5183e --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/main.nf @@ -0,0 +1,72 @@ +process BCFTOOLS_MPILEUP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/5a/5acacb55c52bec97c61fd34ffa8721fce82ce823005793592e2a80bf71632cd0/data': + 'community.wave.seqera.io/library/bcftools:1.21--4335bec1d7b44d11' }" + + input: + tuple val(meta), path(bam), path(intervals) + tuple val(meta2), path(fasta) + val save_mpileup + + output: + tuple val(meta), path("*vcf.gz") , emit: vcf + tuple val(meta), path("*vcf.gz.tbi") , emit: tbi + tuple val(meta), path("*stats.txt") , emit: stats + tuple val(meta), path("*.mpileup.gz"), emit: mpileup, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def mpileup = save_mpileup ? "| tee ${prefix}.mpileup" : "" + def bgzip_mpileup = save_mpileup ? "bgzip ${prefix}.mpileup" : "" + def intervals_cmd = intervals ? "-T ${intervals}" : "" + """ + echo "${meta.id}" > sample_name.list + + bcftools \\ + mpileup \\ + --fasta-ref $fasta \\ + $args \\ + $bam \\ + $intervals_cmd \\ + $mpileup \\ + | bcftools call --output-type v $args2 \\ + | bcftools reheader --samples sample_name.list \\ + | bcftools view --output-file ${prefix}.vcf.gz --output-type z $args3 + + $bgzip_mpileup + + tabix -p vcf -f ${prefix}.vcf.gz + + bcftools stats ${prefix}.vcf.gz > ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bcftools_stats.txt + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + echo "" | gzip > ${prefix}.mpileup.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/mpileup/meta.yml b/modules/nf-core/bcftools/mpileup/meta.yml new file mode 100644 index 0000000000..febcb33f60 --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/meta.yml @@ -0,0 +1,93 @@ +name: bcftools_mpileup +description: Compresses VCF files +keywords: + - variant calling + - mpileup + - VCF +tools: + - mpileup: + description: | + Generates genotype likelihoods at each genomic position with coverage. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM file + pattern: "*.{bam}" + - intervals: + type: file + description: Input intervals file. A file (commonly '.bed') containing regions + to subset + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" + - - save_mpileup: + type: boolean + description: Save mpileup file generated by bcftools mpileup +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*vcf.gz": + type: file + description: VCF gzipped output file + pattern: "*.{vcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*vcf.gz.tbi": + type: file + description: tabix index file + pattern: "*.{vcf.gz.tbi}" + - stats: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*stats.txt": + type: file + description: Text output file containing stats + pattern: "*{stats.txt}" + - mpileup: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.mpileup.gz": + type: file + description: mpileup gzipped output for all positions + pattern: "{*.mpileup.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/norm/environment.yml b/modules/nf-core/bcftools/norm/environment.yml new file mode 100644 index 0000000000..557488607c --- /dev/null +++ b/modules/nf-core/bcftools/norm/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.21 diff --git a/modules/nf-core/bcftools/norm/main.nf b/modules/nf-core/bcftools/norm/main.nf new file mode 100644 index 0000000000..3ad9b35cc2 --- /dev/null +++ b/modules/nf-core/bcftools/norm/main.nf @@ -0,0 +1,74 @@ +process BCFTOOLS_NORM { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/5a/5acacb55c52bec97c61fd34ffa8721fce82ce823005793592e2a80bf71632cd0/data': + 'community.wave.seqera.io/library/bcftools:1.21--4335bec1d7b44d11' }" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + + """ + bcftools norm \\ + --fasta-ref ${fasta} \\ + --output ${prefix}.${extension} \\ + $args \\ + --threads $task.cpus \\ + ${vcf} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + def index = '' + if (extension in ['vcf.gz', 'bcf', 'bcf.gz']) { + if (['--write-index=tbi', '-W=tbi'].any { args.contains(it) } && extension == 'vcf.gz') { + index = 'tbi' + } else if (['--write-index=tbi', '-W=tbi', '--write-index=csi', '-W=csi', '--write-index', '-W'].any { args.contains(it) }) { + index = 'csi' + } + } + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = index ? "touch ${prefix}.${extension}.${index}" : "" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/norm/meta.yml b/modules/nf-core/bcftools/norm/meta.yml new file mode 100644 index 0000000000..b6edeb4aae --- /dev/null +++ b/modules/nf-core/bcftools/norm/meta.yml @@ -0,0 +1,85 @@ +name: bcftools_norm +description: Normalize VCF file +keywords: + - normalize + - norm + - variant calling + - VCF +tools: + - norm: + description: | + Normalize VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be normalized + e.g. 'file1.vcf' + pattern: "*.{vcf,vcf.gz}" + - tbi: + type: file + description: | + An optional index of the VCF file (for when the VCF is compressed) + pattern: "*.vcf.gz.tbi" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: One of uncompressed VCF (.vcf), compressed VCF (.vcf.gz), compressed + BCF (.bcf.gz) or uncompressed BCF (.bcf) normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@ramprasadn" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/sort/environment.yml b/modules/nf-core/bcftools/sort/environment.yml new file mode 100644 index 0000000000..557488607c --- /dev/null +++ b/modules/nf-core/bcftools/sort/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.21 diff --git a/modules/nf-core/bcftools/sort/main.nf b/modules/nf-core/bcftools/sort/main.nf new file mode 100644 index 0000000000..6cbd09b593 --- /dev/null +++ b/modules/nf-core/bcftools/sort/main.nf @@ -0,0 +1,70 @@ +process BCFTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/5a/5acacb55c52bec97c61fd34ffa8721fce82ce823005793592e2a80bf71632cd0/data': + 'community.wave.seqera.io/library/bcftools:1.21--4335bec1d7b44d11' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + """ + bcftools \\ + sort \\ + --output ${prefix}.${extension} \\ + --temp-dir . \\ + $args \\ + $vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/sort/meta.yml b/modules/nf-core/bcftools/sort/meta.yml new file mode 100644 index 0000000000..f7a6eff17d --- /dev/null +++ b/modules/nf-core/bcftools/sort/meta.yml @@ -0,0 +1,65 @@ +name: bcftools_sort +description: Sorts VCF files +keywords: + - sorting + - VCF + - variant calling +tools: + - sort: + description: Sort VCF files by coordinates. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + tool_dev_url: https://github.com/samtools/bcftools + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: The VCF/BCF file to be sorted + pattern: "*.{vcf.gz,vcf,bcf}" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: Sorted VCF file + pattern: "*.{vcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Gwennid" +maintainers: + - "@Gwennid" diff --git a/modules/nf-core/bcftools/stats/environment.yml b/modules/nf-core/bcftools/stats/environment.yml new file mode 100644 index 0000000000..7aa06d0f7a --- /dev/null +++ b/modules/nf-core/bcftools/stats/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.21 + - bioconda::htslib=1.21 diff --git a/modules/nf-core/bcftools/stats/main.nf b/modules/nf-core/bcftools/stats/main.nf new file mode 100644 index 0000000000..fb556e0aa5 --- /dev/null +++ b/modules/nf-core/bcftools/stats/main.nf @@ -0,0 +1,60 @@ +process BCFTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/5a/5acacb55c52bec97c61fd34ffa8721fce82ce823005793592e2a80bf71632cd0/data': + 'community.wave.seqera.io/library/bcftools:1.21--4335bec1d7b44d11' }" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(regions) + tuple val(meta3), path(targets) + tuple val(meta4), path(samples) + tuple val(meta5), path(exons) + tuple val(meta6), path(fasta) + + output: + tuple val(meta), path("*stats.txt"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + def reference_fasta = fasta ? "--fasta-ref ${fasta}" : "" + def exons_file = exons ? "--exons ${exons}" : "" + """ + bcftools stats \\ + $args \\ + $regions_file \\ + $targets_file \\ + $samples_file \\ + $reference_fasta \\ + $exons_file \\ + $vcf > ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/stats/meta.yml b/modules/nf-core/bcftools/stats/meta.yml new file mode 100644 index 0000000000..655a61c5f4 --- /dev/null +++ b/modules/nf-core/bcftools/stats/meta.yml @@ -0,0 +1,105 @@ +name: bcftools_stats +description: Generates stats from VCF files +keywords: + - variant calling + - stats + - VCF +tools: + - stats: + description: | + Parses VCF or BCF and produces text file stats which is suitable for + machine processing and can be plotted using plot-vcfstats. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF input file + pattern: "*.{vcf}" + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. Optional: only required when parameter regions is chosen. + pattern: "*.tbi" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. (VCF, BED or tab-delimited) + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon tbi index files) + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' + - - meta5: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - exons: + type: file + description: | + Tab-delimited file with exons for indel frameshifts (chr,beg,end; 1-based, inclusive, optionally bgzip compressed). + e.g. 'exons.tsv.gz' + - - meta6: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: | + Faidx indexed reference sequence file to determine INDEL context. + e.g. 'reference.fa' +output: + - stats: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*stats.txt": + type: file + description: Text output file containing stats + pattern: "*_{stats.txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@SusiJo" + - "@TCLamnidis" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@SusiJo" + - "@TCLamnidis" diff --git a/modules/nf-core/bcftools/view/environment.yml b/modules/nf-core/bcftools/view/environment.yml new file mode 100644 index 0000000000..ba863b388f --- /dev/null +++ b/modules/nf-core/bcftools/view/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/view/main.nf b/modules/nf-core/bcftools/view/main.nf new file mode 100644 index 0000000000..72b31200b5 --- /dev/null +++ b/modules/nf-core/bcftools/view/main.nf @@ -0,0 +1,75 @@ +process BCFTOOLS_VIEW { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data': + 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f' }" + + input: + tuple val(meta), path(vcf), path(index) + path(regions) + path(targets) + path(samples) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + """ + bcftools view \\ + --output ${prefix}.${extension} \\ + ${regions_file} \\ + ${targets_file} \\ + ${samples_file} \\ + $args \\ + --threads $task.cpus \\ + ${vcf} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def stub_index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && stub_index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${stub_index}" : "" + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/view/meta.yml b/modules/nf-core/bcftools/view/meta.yml new file mode 100644 index 0000000000..e9df974206 --- /dev/null +++ b/modules/nf-core/bcftools/view/meta.yml @@ -0,0 +1,98 @@ +name: bcftools_view +description: View, subset and filter VCF or BCF files by position and filtering expression. + Convert between VCF and BCF +keywords: + - variant calling + - view + - bcftools + - VCF +tools: + - view: + description: | + View, subset and filter VCF or BCF files by position and filtering expression. Convert between VCF and BCF + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be inspected. + e.g. 'file.vcf' + ontologies: [] + - index: + type: file + description: | + The tab index for the VCF file to be inspected. + e.g. 'file.tbi' + ontologies: [] + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. + e.g. 'file.vcf' + ontologies: [] + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon index files) + e.g. 'file.vcf' + ontologies: [] + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: VCF normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@abhi18av" +maintainers: + - "@abhi18av" diff --git a/modules/nf-core/bwa/index/environment.yml b/modules/nf-core/bwa/index/environment.yml new file mode 100644 index 0000000000..ed5448a197 --- /dev/null +++ b/modules/nf-core/bwa/index/environment.yml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + # renovate: datasource=conda depName=bioconda/bwa + - bioconda::bwa=0.7.18 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.21 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.21 diff --git a/modules/nf-core/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf new file mode 100644 index 0000000000..72e078aac2 --- /dev/null +++ b/modules/nf-core/bwa/index/main.nf @@ -0,0 +1,55 @@ +process BWA_INDEX { + tag "$fasta" + // NOTE requires 5.37N memory where N is the size of the database + // source: https://bio-bwa.sourceforge.net/bwa.shtml#8 + memory { 6.B * fasta.size() } + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/bf/bf7890f8d4e38a7586581cb7fa13401b7af1582f21d94eef969df4cea852b6da/data' : + 'community.wave.seqera.io/library/bwa_htslib_samtools:56c9f8d5201889a4' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("bwa") , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${fasta.baseName}" + def args = task.ext.args ?: '' + """ + mkdir bwa + bwa \\ + index \\ + $args \\ + -p bwa/${prefix} \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${fasta.baseName}" + """ + mkdir bwa + + touch bwa/${prefix}.amb + touch bwa/${prefix}.ann + touch bwa/${prefix}.bwt + touch bwa/${prefix}.pac + touch bwa/${prefix}.sa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwa/index/meta.yml b/modules/nf-core/bwa/index/meta.yml new file mode 100644 index 0000000000..1781586fa1 --- /dev/null +++ b/modules/nf-core/bwa/index/meta.yml @@ -0,0 +1,58 @@ +name: bwa_index +description: Create BWA index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: https://bio-bwa.sourceforge.net/bwa.shtml + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] + identifier: "biotools:bwa" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file + ontologies: + - edam: "http://edamontology.org/data_2044" # Sequence + - edam: "http://edamontology.org/format_1929" # FASTA +output: + index: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - bwa: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + pattern: "*.{amb,ann,bwt,pac,sa}" + ontologies: + - edam: "http://edamontology.org/data_3210" # Genome index + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@drpatelh" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@maxulysse" + - "@gallvp" diff --git a/modules/nf-core/bwa/mem/environment.yml b/modules/nf-core/bwa/mem/environment.yml new file mode 100644 index 0000000000..ed5448a197 --- /dev/null +++ b/modules/nf-core/bwa/mem/environment.yml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + # renovate: datasource=conda depName=bioconda/bwa + - bioconda::bwa=0.7.18 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.21 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.21 diff --git a/modules/nf-core/bwa/mem/main.nf b/modules/nf-core/bwa/mem/main.nf new file mode 100644 index 0000000000..3c54417824 --- /dev/null +++ b/modules/nf-core/bwa/mem/main.nf @@ -0,0 +1,74 @@ +process BWA_MEM { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/bf/bf7890f8d4e38a7586581cb7fa13401b7af1582f21d94eef969df4cea852b6da/data' : + 'community.wave.seqera.io/library/bwa_htslib_samtools:56c9f8d5201889a4' }" + + input: + tuple val(meta) , path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) + val sort_bam + + output: + tuple val(meta), path("*.bam") , emit: bam, optional: true + tuple val(meta), path("*.cram") , emit: cram, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + tuple val(meta), path("*.crai") , emit: crai, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + def extension = args2.contains("--output-fmt sam") ? "sam" : + args2.contains("--output-fmt cram") ? "cram": + sort_bam && args2.contains("-O cram")? "cram": + !sort_bam && args2.contains("-C") ? "cram": + "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + bwa mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools $samtools_command $args2 ${reference} --threads $task.cpus -o ${prefix}.${extension} - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args2.contains("--output-fmt sam") ? "sam" : + args2.contains("--output-fmt cram") ? "cram": + sort_bam && args2.contains("-O cram")? "cram": + !sort_bam && args2.contains("-C") ? "cram": + "bam" + """ + touch ${prefix}.${extension} + touch ${prefix}.csi + touch ${prefix}.crai + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwa/mem/meta.yml b/modules/nf-core/bwa/mem/meta.yml new file mode 100644 index 0000000000..b6f696c03b --- /dev/null +++ b/modules/nf-core/bwa/mem/meta.yml @@ -0,0 +1,111 @@ +name: bwa_mem +description: Performs fastq alignment to a fasta reference using BWA +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sam +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: https://bio-bwa.sourceforge.net/bwa.shtml + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] + identifier: "biotools:bwa" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + ontologies: + - edam: "http://edamontology.org/data_2044" # Sequence + - edam: "http://edamontology.org/format_1930" # FASTQ + - - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" + ontologies: + - edam: "http://edamontology.org/data_3210" # Genome index + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fasta,fa}" + ontologies: + - edam: "http://edamontology.org/data_2044" # Sequence + - edam: "http://edamontology.org/format_1929" # FASTA + - - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - bam: + - meta: + type: file + description: Output BAM file containing read alignments + - "*.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + - cram: + - meta: + type: file + description: Output CRAM file containing read alignments + - "*.cram": + type: file + description: Output CRAM file containing read alignments + pattern: "*.{cram}" + ontologies: + - edam: "http://edamontology.org/format_3462" # CRAM + - csi: + - meta: + type: file + description: Optional index file for BAM file + - "*.csi": + type: file + description: Optional index file for BAM file + pattern: "*.{csi}" + - crai: + - meta: + type: file + description: Optional index file for CRAM file + - "*.crai": + type: file + description: Optional index file for CRAM file + pattern: "*.{crai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@jeremy1805" + - "@matthdsm" +maintainers: + - "@drpatelh" + - "@jeremy1805" + - "@matthdsm" diff --git a/modules/nf-core/bwamem2/index/environment.yml b/modules/nf-core/bwamem2/index/environment.yml new file mode 100644 index 0000000000..c069e281ac --- /dev/null +++ b/modules/nf-core/bwamem2/index/environment.yml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + # renovate: datasource=conda depName=bioconda/bwa-mem2 + - bwa-mem2=2.2.1 + # renovate: datasource=conda depName=bioconda/htslib + - htslib=1.21 + # renovate: datasource=conda depName=bioconda/samtools + - samtools=1.21 diff --git a/modules/nf-core/bwamem2/index/main.nf b/modules/nf-core/bwamem2/index/main.nf new file mode 100644 index 0000000000..529c66e8fd --- /dev/null +++ b/modules/nf-core/bwamem2/index/main.nf @@ -0,0 +1,55 @@ +process BWAMEM2_INDEX { + tag "$fasta" + // NOTE Requires 28N GB memory where N is the size of the reference sequence, floor of 280M + // source: https://github.com/bwa-mem2/bwa-mem2/issues/9 + memory { (280.MB * Math.ceil(fasta.size() / 10000000)) * task.attempt } + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9a/9ac054213e67b3c9308e409b459080bbe438f8fd6c646c351bc42887f35a42e7/data' : + 'community.wave.seqera.io/library/bwa-mem2_htslib_samtools:e1f420694f8e42bd' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("bwamem2"), emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${fasta}" + def args = task.ext.args ?: '' + """ + mkdir bwamem2 + bwa-mem2 \\ + index \\ + $args \\ + -p bwamem2/${prefix} \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${fasta}" + + """ + mkdir bwamem2 + touch bwamem2/${prefix}.0123 + touch bwamem2/${prefix}.ann + touch bwamem2/${prefix}.pac + touch bwamem2/${prefix}.amb + touch bwamem2/${prefix}.bwt.2bit.64 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwamem2/index/meta.yml b/modules/nf-core/bwamem2/index/meta.yml new file mode 100644 index 0000000000..b2aa45fb65 --- /dev/null +++ b/modules/nf-core/bwamem2/index/meta.yml @@ -0,0 +1,52 @@ +name: bwamem2_index +description: Create BWA-mem2 index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - bwamem2: + description: | + BWA-mem2 is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/bwa-mem2/bwa-mem2 + documentation: https://github.com/bwa-mem2/bwa-mem2#usage + licence: ["MIT"] + identifier: "biotools:bwa-mem2" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file + ontologies: + - edam: "http://edamontology.org/data_2044" # Sequence + - edam: "http://edamontology.org/format_1929" # FASTA +output: + index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bwamem2: + type: file + description: BWA genome index files + pattern: "*.{0123,amb,ann,bwt.2bit.64,pac}" + ontologies: + - edam: "http://edamontology.org/data_3210" # Genome index + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/bwamem2/mem/environment.yml b/modules/nf-core/bwamem2/mem/environment.yml new file mode 100644 index 0000000000..c069e281ac --- /dev/null +++ b/modules/nf-core/bwamem2/mem/environment.yml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + # renovate: datasource=conda depName=bioconda/bwa-mem2 + - bwa-mem2=2.2.1 + # renovate: datasource=conda depName=bioconda/htslib + - htslib=1.21 + # renovate: datasource=conda depName=bioconda/samtools + - samtools=1.21 diff --git a/modules/nf-core/bwamem2/mem/main.nf b/modules/nf-core/bwamem2/mem/main.nf new file mode 100644 index 0000000000..eab662a87a --- /dev/null +++ b/modules/nf-core/bwamem2/mem/main.nf @@ -0,0 +1,83 @@ +process BWAMEM2_MEM { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9a/9ac054213e67b3c9308e409b459080bbe438f8fd6c646c351bc42887f35a42e7/data' : + 'community.wave.seqera.io/library/bwa-mem2_htslib_samtools:e1f420694f8e42bd' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) + val sort_bam + + output: + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram, optional:true + tuple val(meta), path("*.crai") , emit: crai, optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + bwa-mem2 \\ + mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools $samtools_command $args2 -@ $task.cpus ${reference} -o ${prefix}.${extension} - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } + + """ + touch ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwamem2/mem/meta.yml b/modules/nf-core/bwamem2/mem/meta.yml new file mode 100644 index 0000000000..d17e0dbd3a --- /dev/null +++ b/modules/nf-core/bwamem2/mem/meta.yml @@ -0,0 +1,129 @@ +name: bwamem2_mem +description: Performs fastq alignment to a fasta reference using BWA +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sam +tools: + - bwa: + description: | + BWA-mem2 is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/bwa-mem2/bwa-mem2 + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["MIT"] + identifier: "biotools:bwa-mem2" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + ontologies: + - edam: "http://edamontology.org/data_2044" # Sequence + - edam: "http://edamontology.org/format_1930" # FASTQ + - - meta2: + type: map + description: | + Groovy Map containing reference/index information + e.g. [ id:'test' ] + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{0132,amb,ann,bwt.2bit.64,pac}" + ontologies: + - edam: "http://edamontology.org/data_3210" # Genome index + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fa,fasta,fna}" + ontologies: + - edam: "http://edamontology.org/data_2044" # Sequence + - edam: "http://edamontology.org/format_1929" # FASTA + - - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - sam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sam": + type: file + description: Output SAM file containing read alignments + pattern: "*.{sam}" + ontologies: + - edam: "http://edamontology.org/format_2573" # SAM + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Output CRAM file containing read alignments + pattern: "*.{cram}" + ontologies: + - edam: "http://edamontology.org/format_3462" # CRAM + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: Index file for CRAM file + pattern: "*.{crai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Index file for BAM file + pattern: "*.{csi}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" + - "@matthdsm" +maintainers: + - "@maxulysse" + - "@matthdsm" diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 0000000000..50c2059afb --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 0000000000..2862c64cd9 --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,78 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : + 'biocontainers/pigz:2.3.4' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // choose appropriate concatenation tool depending on input and output format + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 0000000000..81778a0671 --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,43 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" +output: + - file_out: + - meta: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - ${prefix}: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml new file mode 100644 index 0000000000..9b926b1ffa --- /dev/null +++ b/modules/nf-core/cat/fastq/environment.yml @@ -0,0 +1,12 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::coreutils=9.5 + - conda-forge::grep=3.11 + - conda-forge::gzip=1.13 + - conda-forge::lbzip2=2.5 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf new file mode 100644 index 0000000000..acfb6d0e62 --- /dev/null +++ b/modules/nf-core/cat/fastq/main.nf @@ -0,0 +1,88 @@ +process CAT_FASTQ { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/52ccce28d2ab928ab862e25aae26314d69c8e38bd41ca9431c67ef05221348aa/data' + : 'community.wave.seqera.io/library/coreutils_grep_gzip_lbzip2_pruned:838ba80435a629f8'}" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + + output: + tuple val(meta), path("*.merged.fastq.gz"), emit: reads + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect { it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } else { + error("Could not find any FASTQ files to concatenate in the process input") + } + } + else { + if (readList.size >= 2) { + def read1 = [] + def read2 = [] + readList.eachWithIndex { v, ix -> (ix & 1 ? read2 : read1) << v } + """ + cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz + cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } else { + error("Could not find any FASTQ file pairs to concatenate in the process input") + } + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect { it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + echo '' | gzip > ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } else { + error("Could not find any FASTQ files to concatenate in the process input") + } + } + else { + if (readList.size >= 2) { + """ + echo '' | gzip > ${prefix}_1.merged.fastq.gz + echo '' | gzip > ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } else { + error("Could not find any FASTQ file pairs to concatenate in the process input") + } + } +} diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml new file mode 100644 index 0000000000..91ff2fb5f6 --- /dev/null +++ b/modules/nf-core/cat/fastq/meta.yml @@ -0,0 +1,45 @@ +name: cat_fastq +description: Concatenates fastq files +keywords: + - cat + - fastq + - concatenate +tools: + - cat: + description: | + The cat utility reads files sequentially, writing them to the standard output. + documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files to be concatenated. +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.merged.fastq.gz": + type: file + description: Merged fastq file + pattern: "*.{merged.fastq.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/cnvkit/antitarget/environment.yml b/modules/nf-core/cnvkit/antitarget/environment.yml new file mode 100644 index 0000000000..9b3082be06 --- /dev/null +++ b/modules/nf-core/cnvkit/antitarget/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cnvkit=0.9.11 diff --git a/modules/nf-core/cnvkit/antitarget/main.nf b/modules/nf-core/cnvkit/antitarget/main.nf new file mode 100644 index 0000000000..0c1f6674a4 --- /dev/null +++ b/modules/nf-core/cnvkit/antitarget/main.nf @@ -0,0 +1,36 @@ +process CNVKIT_ANTITARGET { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.11--pyhdfd78af_0': + 'biocontainers/cnvkit:0.9.11--pyhdfd78af_0' }" + + input: + tuple val(meta), path(targets) + + output: + tuple val(meta), path("*.bed"), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + cnvkit.py \\ + antitarget \\ + $targets \\ + --output ${prefix}.antitarget.bed \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/cnvkit/antitarget/meta.yml b/modules/nf-core/cnvkit/antitarget/meta.yml new file mode 100644 index 0000000000..13f12a10c0 --- /dev/null +++ b/modules/nf-core/cnvkit/antitarget/meta.yml @@ -0,0 +1,52 @@ +name: cnvkit_antitarget +description: Derive off-target (“antitarget”) bins from target regions. +keywords: + - cvnkit + - antitarget + - cnv + - copy number +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. + It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html + tool_dev_url: "https://github.com/etal/cnvkit" + doi: 10.1371/journal.pcbi.1004873 + licence: ["Apache-2.0"] + identifier: biotools:cnvkit +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - targets: + type: file + description: File containing genomic regions + pattern: "*.{bed}" +output: + - bed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bed": + type: file + description: File containing off-target regions + pattern: "*.{bed}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@adamrtalbot" + - "@priesgo" + - "@SusiJo" +maintainers: + - "@adamrtalbot" + - "@priesgo" + - "@SusiJo" diff --git a/modules/nf-core/cnvkit/batch/environment.yml b/modules/nf-core/cnvkit/batch/environment.yml new file mode 100644 index 0000000000..a2466da99f --- /dev/null +++ b/modules/nf-core/cnvkit/batch/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::cnvkit=0.9.10 + - bioconda::htslib=1.17 + - bioconda::samtools=1.17 diff --git a/modules/nf-core/cnvkit/batch/main.nf b/modules/nf-core/cnvkit/batch/main.nf new file mode 100644 index 0000000000..9e8aafac65 --- /dev/null +++ b/modules/nf-core/cnvkit/batch/main.nf @@ -0,0 +1,110 @@ +process CNVKIT_BATCH { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:c94363856059151a2974dc501fb07a0360cc60a3-0' : + 'biocontainers/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:c94363856059151a2974dc501fb07a0360cc60a3-0' }" + + input: + tuple val(meta), path(tumor), path(normal) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fasta_fai) + tuple val(meta4), path(targets) + tuple val(meta5), path(reference) + val panel_of_normals + + output: + tuple val(meta), path("*.bed"), emit: bed + tuple val(meta), path("*.cnn"), emit: cnn, optional: true + tuple val(meta), path("*.cnr"), emit: cnr, optional: true + tuple val(meta), path("*.cns"), emit: cns, optional: true + tuple val(meta), path("*.pdf"), emit: pdf, optional: true + tuple val(meta), path("*.png"), emit: png, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def tumor_exists = tumor ? true : false + def normal_exists = normal ? true : false + def reference_exists = reference ? true : false + + // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow + def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false + def normal_cram = normal_exists && normal.Extension == "cram" ? true : false + def tumor_bam = tumor_exists && tumor.Extension == "bam" ? true : false + def normal_bam = normal_exists && normal.Extension == "bam" ? true : false + + def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}" + + // tumor_only mode does not need fasta & target + // instead it requires a pre-computed reference.cnn which is built from fasta & target + def (normal_out, normal_args, fasta_args) = ["", "", ""] + def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : "" + + if (normal_exists){ + def normal_prefix = normal.BaseName + normal_out = normal_cram ? "${normal_prefix}" + ".bam" : "${normal}" + fasta_args = fasta ? "--fasta $fasta" : "" + + // germline mode + // normal samples must be input without a flag + // requires flag --normal to be empty [] + if(!tumor_exists){ + tumor_out = "${normal_prefix}" + ".bam" + normal_args = "--normal " + } + // somatic mode + else { + normal_args = normal_prefix ? "--normal $normal_out" : "" + } + if (reference_exists){ + fasta_args = "" + normal_args = "" + } + } + + // generation of panel of normals + def generate_pon = panel_of_normals ? true : false + + if (generate_pon && !tumor_exists){ + def pon_input = normal.join(' ') + normal_args = "--normal $pon_input" + tumor_out = "" + } + + def target_args = targets && !reference_exists ? "--targets $targets" : "" + def reference_args = reference ? "--reference $reference" : "" + + def samtools_cram_convert = '' + samtools_cram_convert += normal_cram ? " samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $normal_out\n" : '' + samtools_cram_convert += normal_cram ? " samtools index $normal_out\n" : '' + samtools_cram_convert += tumor_cram ? " samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out\n" : '' + samtools_cram_convert += tumor_cram ? " samtools index $tumor_out\n" : '' + def versions = normal_cram || tumor_cram ? + "samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')\n cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g')" : + "cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g')" + """ + $samtools_cram_convert + + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ${versions} + END_VERSIONS + """ +} diff --git a/modules/nf-core/cnvkit/batch/meta.yml b/modules/nf-core/cnvkit/batch/meta.yml new file mode 100644 index 0000000000..30f7a1a29b --- /dev/null +++ b/modules/nf-core/cnvkit/batch/meta.yml @@ -0,0 +1,155 @@ +name: cnvkit_batch +description: Copy number variant detection from high-throughput sequencing data +keywords: + - cnvkit + - bam + - fasta + - copy number +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html + licence: ["Apache-2.0"] + identifier: biotools:cnvkit +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tumor: + type: file + description: | + Input tumour sample bam file (or cram) + - normal: + type: file + description: | + Input normal sample bam file (or cram) + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: | + Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta_fai: + type: file + description: | + Input reference genome fasta index (optional, but recommended for cram_input) + - - meta4: + type: map + description: | + Groovy Map containing information about target file + e.g. [ id:'test' ] + - targets: + type: file + description: | + Input target bed file + - - meta5: + type: map + description: | + Groovy Map containing information about reference file + e.g. [ id:'test' ] + - reference: + type: file + description: | + Input reference cnn-file (only for germline and tumor-only running) + - - panel_of_normals: + type: file + description: | + Input panel of normals file +output: + - bed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bed": + type: file + description: File containing genomic regions + pattern: "*.{bed}" + - cnn: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cnn": + type: file + description: File containing coverage information + pattern: "*.{cnn}" + - cnr: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cnr": + type: file + description: File containing copy number ratio information + pattern: "*.{cnr}" + - cns: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cns": + type: file + description: File containing copy number segment information + pattern: "*.{cns}" + - pdf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.pdf": + type: file + description: File with plot of copy numbers or segments on chromosomes + pattern: "*.{pdf}" + - png: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.png": + type: file + description: File with plot of bin-level log2 coverages and segmentation calls + pattern: "*.{png}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@adamrtalbot" + - "@drpatelh" + - "@fbdtemme" + - "@kaurravneet4123" + - "@KevinMenden" + - "@lassefolkersen" + - "@MaxUlysse" + - "@priesgo" + - "@SusiJo" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@fbdtemme" + - "@kaurravneet4123" + - "@KevinMenden" + - "@lassefolkersen" + - "@MaxUlysse" + - "@priesgo" + - "@SusiJo" diff --git a/modules/nf-core/cnvkit/call/environment.yml b/modules/nf-core/cnvkit/call/environment.yml new file mode 100644 index 0000000000..690d8fd7f7 --- /dev/null +++ b/modules/nf-core/cnvkit/call/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cnvkit=0.9.10 diff --git a/modules/nf-core/cnvkit/call/main.nf b/modules/nf-core/cnvkit/call/main.nf new file mode 100644 index 0000000000..06d51e857e --- /dev/null +++ b/modules/nf-core/cnvkit/call/main.nf @@ -0,0 +1,47 @@ +process CNVKIT_CALL { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.10--pyhdfd78af_0': + 'biocontainers/cnvkit:0.9.10--pyhdfd78af_0' }" + + input: + tuple val(meta) , path(cns), path(vcf) + + output: + tuple val(meta), path("*.cns"), emit: cns + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def vcf_cmd = vcf ? "-v $vcf" : "" + """ + cnvkit.py call \\ + $cns \\ + $vcf_cmd \\ + $args \\ + -o ${prefix}.cns + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.cns + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/cnvkit/call/meta.yml b/modules/nf-core/cnvkit/call/meta.yml new file mode 100644 index 0000000000..b3b4a4a78a --- /dev/null +++ b/modules/nf-core/cnvkit/call/meta.yml @@ -0,0 +1,52 @@ +name: cnvkit_call +description: Given segmented log2 ratio estimates (.cns), derive each segment’s absolute + integer copy number +keywords: + - cnvkit + - bam + - fasta + - copy number +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html + licence: ["Apache-2.0"] + identifier: biotools:cnvkit +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cns: + type: file + description: CNVKit CNS file. + pattern: "*.cns" + - vcf: + type: file + description: Germline VCF file for BAF. + pattern: "*.vcf{,.gz}" +output: + - cns: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cns": + type: file + description: CNS file. + pattern: "*.cns" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@adamrtalbot" + - "@priesgo" +maintainers: + - "@adamrtalbot" + - "@priesgo" diff --git a/modules/nf-core/cnvkit/export/environment.yml b/modules/nf-core/cnvkit/export/environment.yml new file mode 100644 index 0000000000..690d8fd7f7 --- /dev/null +++ b/modules/nf-core/cnvkit/export/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cnvkit=0.9.10 diff --git a/modules/nf-core/cnvkit/export/main.nf b/modules/nf-core/cnvkit/export/main.nf new file mode 100644 index 0000000000..d1d7d3415b --- /dev/null +++ b/modules/nf-core/cnvkit/export/main.nf @@ -0,0 +1,47 @@ +process CNVKIT_EXPORT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.10--pyhdfd78af_0': + 'biocontainers/cnvkit:0.9.10--pyhdfd78af_0' }" + + input: + tuple val(meta), path(cns) + + output: + tuple val(meta), path("${prefix}.${suffix}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.args.tokenize(" ")[0] + """ + cnvkit.py export \\ + $args \\ + $cns \\ + -o ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.args.tokenize(" ")[0] + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/cnvkit/export/meta.yml b/modules/nf-core/cnvkit/export/meta.yml new file mode 100644 index 0000000000..d37e41f98f --- /dev/null +++ b/modules/nf-core/cnvkit/export/meta.yml @@ -0,0 +1,49 @@ +name: cnvkit_export +description: Convert copy number ratio tables (.cnr files) or segments (.cns) to another + format. +keywords: + - cnvkit + - copy number + - export +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and + visualize copy number from high-throughput DNA sequencing data. + It is designed for use with hybrid capture, including both whole-exome and custom + target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html + licence: ["Apache-2.0"] + identifier: biotools:cnvkit +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cns: + type: file + description: CNVKit CNS file. + pattern: "*.cns" +output: + - output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${suffix}: + type: file + description: Output file + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@adamrtalbot" + - "@priesgo" +maintainers: + - "@adamrtalbot" + - "@priesgo" diff --git a/modules/nf-core/cnvkit/genemetrics/environment.yml b/modules/nf-core/cnvkit/genemetrics/environment.yml new file mode 100644 index 0000000000..690d8fd7f7 --- /dev/null +++ b/modules/nf-core/cnvkit/genemetrics/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cnvkit=0.9.10 diff --git a/modules/nf-core/cnvkit/genemetrics/main.nf b/modules/nf-core/cnvkit/genemetrics/main.nf new file mode 100644 index 0000000000..825b12bdac --- /dev/null +++ b/modules/nf-core/cnvkit/genemetrics/main.nf @@ -0,0 +1,39 @@ +process CNVKIT_GENEMETRICS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.10--pyhdfd78af_0': + 'biocontainers/cnvkit:0.9.10--pyhdfd78af_0' }" + + input: + tuple val(meta), path(cnr), path(cns) + + output: + tuple val(meta), path("*.tsv"), emit: tsv + //tuple val(meta), path("*.cnn"), emit: cnn + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def segments = cns ? "--segment ${cns}" : "" + + """ + cnvkit.py \\ + genemetrics \\ + $cnr \\ + $segments \\ + --output ${prefix}.tsv \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/cnvkit/genemetrics/meta.yml b/modules/nf-core/cnvkit/genemetrics/meta.yml new file mode 100644 index 0000000000..6b110accc2 --- /dev/null +++ b/modules/nf-core/cnvkit/genemetrics/meta.yml @@ -0,0 +1,63 @@ +name: cnvkit_genemetrics +description: Copy number variant detection from high-throughput sequencing data +keywords: + - cnvkit + - bam + - fasta + - copy number +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html + licence: ["Apache-2.0"] + identifier: biotools:cnvkit +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cnr: + type: file + description: CNR file + pattern: "*.cnr" + - cns: + type: file + description: CNS file [Optional] + pattern: "*.cns" +output: + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: TSV file + pattern: "*.tsv" + - cnn: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cnn": + type: file + description: CNN file + pattern: "*.cnn" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@adamrtalbot" + - "@marrip" + - "@priesgo" +maintainers: + - "@adamrtalbot" + - "@marrip" + - "@priesgo" diff --git a/modules/nf-core/cnvkit/reference/environment.yml b/modules/nf-core/cnvkit/reference/environment.yml new file mode 100644 index 0000000000..9b3082be06 --- /dev/null +++ b/modules/nf-core/cnvkit/reference/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cnvkit=0.9.11 diff --git a/modules/nf-core/cnvkit/reference/main.nf b/modules/nf-core/cnvkit/reference/main.nf new file mode 100644 index 0000000000..857996f111 --- /dev/null +++ b/modules/nf-core/cnvkit/reference/main.nf @@ -0,0 +1,40 @@ +process CNVKIT_REFERENCE { + tag "$fasta" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.11--pyhdfd78af_0': + 'biocontainers/cnvkit:0.9.11--pyhdfd78af_0' }" + + input: + path fasta + path targets + path antitargets + + output: + path "*.cnn" , emit: cnn + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: targets.BaseName + + """ + cnvkit.py \\ + reference \\ + --fasta $fasta \\ + --targets $targets \\ + --antitargets $antitargets \\ + --output ${prefix}.reference.cnn \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/cnvkit/reference/meta.yml b/modules/nf-core/cnvkit/reference/meta.yml new file mode 100644 index 0000000000..965a7b5795 --- /dev/null +++ b/modules/nf-core/cnvkit/reference/meta.yml @@ -0,0 +1,51 @@ +name: cnvkit_reference +description: Compile a coverage reference from the given files (normal samples). +keywords: + - cnvkit + - reference + - cnv + - copy number +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. + It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html + tool_dev_url: https://github.com/etal/cnvkit + doi: 10.1371/journal.pcbi.1004873 + licence: ["Apache-2.0"] + identifier: biotools:cnvkit +input: + - - fasta: + type: file + description: File containing reference genome + pattern: "*.{fasta}" + - - targets: + type: file + description: File containing genomic regions + pattern: "*.{bed}" + - - antitargets: + type: file + description: File containing off-target genomic regions + pattern: "*.{bed}" +output: + - cnn: + - "*.cnn": + type: file + description: File containing a copy-number reference (required for CNV calling + in tumor_only mode) + pattern: "*.{cnn}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@adamrtalbot" + - "@priesgo" + - "@SusiJo" +maintainers: + - "@adamrtalbot" + - "@priesgo" + - "@SusiJo" diff --git a/modules/nf-core/controlfreec/assesssignificance/controlfreec-assesssignificance.diff b/modules/nf-core/controlfreec/assesssignificance/controlfreec-assesssignificance.diff new file mode 100644 index 0000000000..0e6ac7d8cf --- /dev/null +++ b/modules/nf-core/controlfreec/assesssignificance/controlfreec-assesssignificance.diff @@ -0,0 +1,49 @@ +Changes in component 'nf-core/controlfreec/assesssignificance' +Changes in 'controlfreec/assesssignificance/main.nf': +--- modules/nf-core/controlfreec/assesssignificance/main.nf ++++ modules/nf-core/controlfreec/assesssignificance/main.nf +@@ -4,8 +4,8 @@ + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +- 'https://depot.galaxyproject.org/singularity/control-freec:11.6b--hdbdd923_0' : +- 'biocontainers/control-freec:11.6b--hdbdd923_0' }" ++ 'https://depot.galaxyproject.org/singularity/control-freec:11.6--h1b792b2_1' : ++ 'biocontainers/control-freec:11.6--h1b792b2_1' }" + + input: + tuple val(meta), path(cnvs), path(ratio) +@@ -20,7 +20,7 @@ + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" +- def VERSION = '11.6b' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. ++ def VERSION = '11.6' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + cat \$(which assess_significance.R) | R --slave --args ${cnvs} ${ratio} + +@@ -34,7 +34,7 @@ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" +- def VERSION = '11.6b' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. ++ def VERSION = '11.6' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.p.value.txt + + +'modules/nf-core/controlfreec/assesssignificance/meta.yml' is unchanged +Changes in 'controlfreec/assesssignificance/environment.yml': +--- modules/nf-core/controlfreec/assesssignificance/environment.yml ++++ modules/nf-core/controlfreec/assesssignificance/environment.yml +@@ -4,4 +4,4 @@ + - conda-forge + - bioconda + dependencies: +- - bioconda::control-freec=11.6b ++ - bioconda::control-freec=11.6 + +'modules/nf-core/controlfreec/assesssignificance/tests/main.nf.test.snap' is unchanged +'modules/nf-core/controlfreec/assesssignificance/tests/nextflow.config' is unchanged +'modules/nf-core/controlfreec/assesssignificance/tests/main.nf.test' is unchanged +************************************************************ diff --git a/modules/nf-core/controlfreec/assesssignificance/environment.yml b/modules/nf-core/controlfreec/assesssignificance/environment.yml new file mode 100644 index 0000000000..8912f12f2a --- /dev/null +++ b/modules/nf-core/controlfreec/assesssignificance/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::control-freec=11.6 diff --git a/modules/nf-core/controlfreec/assesssignificance/main.nf b/modules/nf-core/controlfreec/assesssignificance/main.nf new file mode 100644 index 0000000000..9047a634cb --- /dev/null +++ b/modules/nf-core/controlfreec/assesssignificance/main.nf @@ -0,0 +1,47 @@ +process CONTROLFREEC_ASSESSSIGNIFICANCE { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/control-freec:11.6--h1b792b2_1' + : 'biocontainers/control-freec:11.6--h1b792b2_1'}" + + input: + tuple val(meta), path(cnvs), path(ratio) + + output: + tuple val(meta), path("*.p.value.txt"), emit: p_value_txt + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '11.6' + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + cat \$(which assess_significance.R) | R --slave --args ${cnvs} ${ratio} + + mv *.p.value.txt ${prefix}.p.value.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: ${VERSION} + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '11.6' + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.p.value.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/controlfreec/assesssignificance/meta.yml b/modules/nf-core/controlfreec/assesssignificance/meta.yml new file mode 100644 index 0000000000..658d21b618 --- /dev/null +++ b/modules/nf-core/controlfreec/assesssignificance/meta.yml @@ -0,0 +1,59 @@ +name: controlfreec_assesssignificance +description: Add both Wilcoxon test and Kolmogorov-Smirnov test p-values to each CNV + output of FREEC +keywords: + - cna + - cnv + - somatic + - single + - tumor-only +tools: + - controlfreec/assesssignificance: + description: Copy number and genotype annotation from whole genome and whole exome + sequencing data. + homepage: http://boevalab.inf.ethz.ch/FREEC + documentation: http://boevalab.inf.ethz.ch/FREEC/tutorial.html + tool_dev_url: https://github.com/BoevaLab/FREEC/ + doi: "10.1093/bioinformatics/btq635" + licence: ["GPL >=2"] + identifier: "" +input: + # Only when we have meta + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cnvs: + type: file + description: _CNVs file generated by FREEC + pattern: "*._CNVs" + ontologies: [] + - ratio: + type: file + description: ratio file generated by FREEC + pattern: "*.ratio.txt" + ontologies: [] +output: + p_value_txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.p.value.txt": + type: file + description: CNV file containing p_values for each call + pattern: "*.p.value.txt" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/controlfreec/freec/environment.yml b/modules/nf-core/controlfreec/freec/environment.yml new file mode 100644 index 0000000000..3aebc4bde1 --- /dev/null +++ b/modules/nf-core/controlfreec/freec/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::control-freec=11.6b diff --git a/modules/nf-core/controlfreec/freec/main.nf b/modules/nf-core/controlfreec/freec/main.nf new file mode 100644 index 0000000000..a788c9bcd6 --- /dev/null +++ b/modules/nf-core/controlfreec/freec/main.nf @@ -0,0 +1,181 @@ +process CONTROLFREEC_FREEC { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/control-freec:11.6b--hdbdd923_0' + : 'biocontainers/control-freec:11.6b--hdbdd923_0'}" + + input: + tuple val(meta), path(mpileup_normal), path(mpileup_tumor), path(cpn_normal), path(cpn_tumor), path(minipileup_normal), path(minipileup_tumor) + path fasta + path fai + path snp_position + path known_snps + path known_snps_tbi + path chr_directory + path mappability + path target_bed + path gccontent_profile + + output: + tuple val(meta), path("*_ratio.BedGraph"), emit: bedgraph, optional: true + tuple val(meta), path("*_control.cpn"), emit: control_cpn, optional: true + tuple val(meta), path("*_sample.cpn"), emit: sample_cpn + tuple val(meta), path("GC_profile.*.cpn"), emit: gcprofile_cpn, optional: true + tuple val(meta), path("*_BAF.txt"), emit: BAF + tuple val(meta), path("*_CNVs"), emit: CNV + tuple val(meta), path("*_info.txt"), emit: info + tuple val(meta), path("*_ratio.txt"), emit: ratio + tuple val(meta), path("config.txt"), emit: config + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + //"General" configurations + def bedgraphoutput = task.ext.args?.getAt("general")?.getAt("bedgraphoutput") ? "BedGraphOutput = ${task.ext.args["general"]["bedgraphoutput"]}" : "" + def chr_files = chr_directory ? "chrFiles =\${PWD}/${chr_directory}" : "" + def chr_length = fai ? "chrLenFile = \${PWD}/${fai}" : "" + def breakpointthreshold = task.ext.args?.getAt("general")?.getAt("breakpointthreshold") ? "breakPointThreshold = ${task.ext.args["general"]["breakpointthreshold"]}" : "" + def breakpointtype = task.ext.args?.getAt("general")?.getAt("breakpointtype") ? "breakPointType = ${task.ext.args["general"]["breakpointtype"]}" : "" + def coefficientofvariation = task.ext.args?.getAt("general")?.getAt("coefficientofvariation") ? "coefficientOfVariation = ${task.ext.args["general"]["coefficientofvariation"]}" : "" + def contamination = task.ext.args?.getAt("general")?.getAt("contamination") ? "contamination = ${task.ext.args["general"]["contamination"]}" : "" + def contaminationadjustment = task.ext.args?.getAt("general")?.getAt("contaminationadjustment") ? "contaminationAdjustment = ${task.ext.args["general"]["contaminationadjustment"]}" : "" + def degree = task.ext.args?.getAt("general")?.getAt("degree") ? "degree = ${task.ext.args["general"]["degree"]}" : "" + def forcegccontentnormalization = task.ext.args?.getAt("general")?.getAt("forcegccontentnormalization") ? "forceGCcontentNormalization = ${task.ext.args["general"]["forcegccontentnormalization"]}" : "" + def gccontentprofile = gccontent_profile ? "GCcontentProfile = ${gccontent_profile}" : "" + def mappability_cmd = mappability ? "gemMappabilityFile = \${PWD}/${mappability}" : "" + def intercept = task.ext.args?.getAt("general")?.getAt("intercept") ? "intercept = ${task.ext.args["general"]["intercept"]}" : "" + def mincnalength = task.ext.args?.getAt("general")?.getAt("mincnalength") ? "minCNAlength = ${task.ext.args["general"]["mincnalength"]}" : "" + def minmappabilityperwindow = task.ext.args?.getAt("general")?.getAt("minmappabilityperwindow") ? "minMappabilityPerWindow = ${task.ext.args["general"]["minmappabilityperwindow"]}" : "" + def minexpectedgc = task.ext.args?.getAt("general")?.getAt("minexpectedgc") ? "minExpectedGC = ${task.ext.args["general"]["minexpectedgc"]}" : "" + def maxexpectedgc = task.ext.args?.getAt("general")?.getAt("maxexpectedgc") ? "maxExpectedGC = ${task.ext.args["general"]["maxexpectedgc"]}" : "" + def minimalsubclonepresence = task.ext.args?.getAt("general")?.getAt("minimalsubclonepresence") ? "minimalSubclonePresence = ${task.ext.args["general"]["minimalsubclonepresence"]}" : "" + def noisydata = task.ext.args?.getAt("general")?.getAt("noisydata") ? "noisyData = ${task.ext.args["general"]["noisydata"]}" : "" + def output = task.ext.prefix ? "outputDir = \${PWD}/${task.ext.prefix}" : "" + def ploidy = task.ext.args?.getAt("general")?.getAt("ploidy") ? "ploidy = ${task.ext.args["general"]["ploidy"]}" : "" + def printNA = task.ext.args?.getAt("general")?.getAt("printNA") ? "printNA = ${task.ext.args["general"]["printNA"]}" : "" + def readcountthreshold = task.ext.args?.getAt("general")?.getAt("readcountthreshold") ? "readCountThreshold = ${task.ext.args["general"]["readcountthreshold"]}" : "" + def sex = task.ext.args?.getAt("general")?.getAt("sex") ? "sex = ${task.ext.args["general"]["sex"]}" : "" + def step = task.ext.args?.getAt("general")?.getAt("step") ? "step = ${task.ext.args["general"]["step"]}" : "" + def telocentromeric = task.ext.args?.getAt("general")?.getAt("telocentromeric") ? "telocentromeric = ${task.ext.args["general"]["telocentromeric"]} " : "" + def uniquematch = task.ext.args?.getAt("general")?.getAt("uniquematch") ? "uniqueMatch = ${task.ext.args["general"]["uniquematch"]}" : "" + def window = task.ext.args?.getAt("general")?.getAt("window") ? "window = ${task.ext.args["general"]["window"]}" : "" + + //"Control" configurations + def matefile_normal = mpileup_normal ? "mateFile = \${PWD}/${mpileup_normal}" : "" + def matecopynumberfile_normal = cpn_normal ? "mateCopyNumberFile = \${PWD}/${cpn_normal}" : "" + def minipileup_normal_cmd = minipileup_normal ? "miniPileup = \${PWD}/${minipileup_normal}" : "" + def inputformat_normal = task.ext.args?.getAt("control")?.getAt("inputformat") ? "inputFormat = ${task.ext.args["control"]["inputformat"]}" : "" + def mateorientation_normal = task.ext.args?.getAt("control")?.getAt("mateorientation") ? "mateOrientation = ${task.ext.args["control"]["mateorientation"]}" : "" + + //"Sample" configuration + def matefile_tumor = mpileup_tumor ? "mateFile = \${PWD}/${mpileup_tumor}" : "" + def matecopynumberfile_tumor = cpn_tumor ? "mateCopyNumberFile = \${PWD}/${cpn_tumor}" : "" + def minipileup_tumor_cmd = minipileup_tumor ? "miniPileup = \${PWD}/${minipileup_tumor}" : "" + def inputformat_tumor = task.ext.args?.getAt("sample")?.getAt("inputformat") ? "inputFormat = ${task.ext.args["sample"]["inputformat"]}" : "" + def mateorientation_tumor = task.ext.args?.getAt("sample")?.getAt("mateorientation") ? "mateOrientation = ${task.ext.args["sample"]["mateorientation"]}" : "" + + //"BAF" configuration + def makepileup = snp_position ? "makePileup = \${PWD}/${snp_position}" : "" + def fastafile = fasta ? "fastaFile = \${PWD}/${fasta}" : "" + def minimalcoverageperposition = task.ext.args?.getAt("BAF")?.getAt("minimalcoverageperposition") ? "minimalCoveragePerPosition = ${task.ext.args["BAF"]["minimalcoverageperposition"]}" : "" + def minimalqualityperposition = task.ext.args?.getAt("BAF")?.getAt("minimalqualityperposition") ? "minimalQualityPerPosition = ${task.ext.args["BAF"]["minimalqualityperposition"]}" : "" + def shiftinquality = task.ext.args?.getAt("BAF")?.getAt("shiftinquality") ? "shiftInQuality = ${task.ext.args["BAF"]["shiftinquality"]}" : "" + def snpfile = known_snps ? "SNPfile = \$PWD/${known_snps}" : "" + + //"Target" configuration + def target_bed_cmd = target_bed ? "captureRegions = ${target_bed}" : "" + + def VERSION = '11.6b' + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch config.txt + + echo "[general]" >> config.txt + echo ${bedgraphoutput} >> config.txt + echo ${breakpointthreshold} >> config.txt + echo ${breakpointtype} >> config.txt + echo ${chr_files} >> config.txt + echo ${chr_length} >> config.txt + echo ${coefficientofvariation} >> config.txt + echo ${contamination} >> config.txt + echo ${contaminationadjustment} >> config.txt + echo ${degree} >> config.txt + echo ${forcegccontentnormalization} >> config.txt + echo ${gccontentprofile} >> config.txt + echo ${mappability_cmd} >> config.txt + echo ${intercept} >> config.txt + echo ${mincnalength} >> config.txt + echo ${minmappabilityperwindow} >> config.txt + echo ${minexpectedgc} >> config.txt + echo ${maxexpectedgc} >> config.txt + echo ${minimalsubclonepresence} >> config.txt + echo "maxThreads = ${task.cpus}" >> config.txt + echo ${noisydata} >> config.txt + echo ${output} >> config.txt + echo ${ploidy} >> config.txt + echo ${printNA} >> config.txt + echo ${readcountthreshold} >> config.txt + echo ${sex} >> config.txt + echo ${step} >> config.txt + echo ${telocentromeric} >> config.txt + echo ${uniquematch} >> config.txt + echo ${window} >> config.txt + + echo "[control]" >> config.txt + echo ${matefile_normal} >> config.txt + echo ${matecopynumberfile_normal} >> config.txt + echo ${minipileup_normal_cmd} >> config.txt + echo ${inputformat_normal} >> config.txt + echo ${mateorientation_normal} >> config.txt + + echo "[sample]" >> config.txt + echo ${matefile_tumor} >> config.txt + echo ${matecopynumberfile_tumor} >> config.txt + echo ${minipileup_tumor_cmd} >> config.txt + echo ${inputformat_tumor} >> config.txt + echo ${mateorientation_tumor} >> config.txt + + echo "[BAF]" >> config.txt + echo ${makepileup} >> config.txt + echo ${fastafile} >> config.txt + echo ${minimalcoverageperposition} >> config.txt + echo ${minimalqualityperposition} >> config.txt + echo ${shiftinquality} >> config.txt + echo ${snpfile} >> config.txt + + echo "[target]" >> config.txt + echo ${target_bed_cmd} >> config.txt + + freec -conf config.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: ${VERSION} + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '11.6b' + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}_ratio.BedGraph + touch ${prefix}_sample.cpn + touch GC_profile.${prefix}.cpn + touch ${prefix}_BAF.txt + touch ${prefix}_CNVs + touch ${prefix}_info.txt + touch ${prefix}_ratio.txt + touch config.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/controlfreec/freec/meta.yml b/modules/nf-core/controlfreec/freec/meta.yml new file mode 100644 index 0000000000..8f790c9743 --- /dev/null +++ b/modules/nf-core/controlfreec/freec/meta.yml @@ -0,0 +1,212 @@ +name: controlfreec_freec +description: Copy number and genotype annotation from whole genome and whole exome + sequencing data +keywords: + - cna + - cnv + - somatic + - single + - tumor-only +tools: + - controlfreec/freec: + description: Copy number and genotype annotation from whole genome and whole exome + sequencing data. + homepage: http://boevalab.inf.ethz.ch/FREEC + documentation: http://boevalab.inf.ethz.ch/FREEC/tutorial.html + tool_dev_url: https://github.com/BoevaLab/FREEC/ + doi: "10.1093/bioinformatics/btq635" + licence: ["GPL >=2"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - mpileup_normal: + type: file + description: miniPileup file + ontologies: [] + - mpileup_tumor: + type: file + description: miniPileup file + ontologies: [] + - cpn_normal: + type: file + description: Raw copy number profiles (optional) + pattern: "*.cpn" + ontologies: [] + - cpn_tumor: + type: file + description: Raw copy number profiles (optional) + pattern: "*.cpn" + ontologies: [] + - minipileup_normal: + type: file + description: miniPileup file from previous run (optional) + pattern: "*.pileup" + ontologies: [] + - minipileup_tumor: + type: file + description: miniPileup file from previous run (optional) + pattern: "*.pileup" + ontologies: [] + - fasta: + type: file + description: Reference file (optional; required if args 'makePileup' is set) + pattern: "*.{fasta,fna,fa}" + ontologies: [] + - fai: + type: file + description: Fasta index + pattern: "*.fai" + ontologies: [] + - snp_position: + type: file + description: Path to a BED or VCF file with SNP positions to create a mini pileup + file from the initial BAM file provided in mateFile (optional) + pattern: "*.{bed,vcf}" + ontologies: [] + - known_snps: + type: file + description: File with known SNPs + pattern: "*.{vcf,vcf.gz}" + ontologies: [] + - known_snps_tbi: + type: file + description: Index of known_snps + pattern: "*.tbi" + ontologies: [] + - chr_directory: + type: file + description: Path to directory with chromosome fasta files (optional, required + if gccontentprofile is not provided) + pattern: "*/" + ontologies: [] + - mappability: + type: file + description: Contains information of mappable positions (optional) + pattern: "*.gem" + ontologies: [] + - target_bed: + type: file + description: Sorted bed file containing capture regions (optional) + pattern: "*.bed" + ontologies: [] + - gccontent_profile: + type: file + description: File with GC-content profile + ontologies: [] +output: + bedgraph: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_ratio.BedGraph": + type: file + description: Bedgraph format for the UCSC genome browser + pattern: ".bedgraph" + ontologies: [] + control_cpn: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_control.cpn": + type: file + description: files with raw copy number profiles + pattern: "*_control.cpn" + ontologies: [] + sample_cpn: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_sample.cpn": + type: file + description: files with raw copy number profiles + pattern: "*_sample.cpn" + ontologies: [] + gcprofile_cpn: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - GC_profile.*.cpn: + type: file + description: file with GC-content profile. + pattern: "GC_profile.*.cpn" + ontologies: [] + BAF: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_BAF.txt": + type: file + description: file B-allele frequencies for each possibly heterozygous SNP + position + pattern: "*_BAF.txt" + ontologies: [] + CNV: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_CNVs": + type: file + description: file with coordinates of predicted copy number alterations. + pattern: "*_CNVs" + ontologies: [] + info: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_info.txt": + type: file + description: parsable file with information about FREEC run + pattern: "*_info.txt" + ontologies: [] + ratio: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_ratio.txt": + type: file + description: file with ratios and predicted copy number alterations for each + window + pattern: "*_ratio.txt" + ontologies: [] + config: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - config.txt: + type: file + description: Config file used to run Control-FREEC + pattern: "config.txt" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/controlfreec/freec2bed/environment.yml b/modules/nf-core/controlfreec/freec2bed/environment.yml new file mode 100644 index 0000000000..3aebc4bde1 --- /dev/null +++ b/modules/nf-core/controlfreec/freec2bed/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::control-freec=11.6b diff --git a/modules/nf-core/controlfreec/freec2bed/main.nf b/modules/nf-core/controlfreec/freec2bed/main.nf new file mode 100644 index 0000000000..8a1d06d143 --- /dev/null +++ b/modules/nf-core/controlfreec/freec2bed/main.nf @@ -0,0 +1,46 @@ +process CONTROLFREEC_FREEC2BED { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/control-freec:11.6b--hdbdd923_0' + : 'biocontainers/control-freec:11.6b--hdbdd923_0'}" + + input: + tuple val(meta), path(ratio) + + output: + tuple val(meta), path("*.bed"), emit: bed + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '11.6b' + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + freec2bed.pl -f ${ratio} ${args} > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: ${VERSION} + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '11.6b' + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/controlfreec/freec2bed/meta.yml b/modules/nf-core/controlfreec/freec2bed/meta.yml new file mode 100644 index 0000000000..4a6bcc370d --- /dev/null +++ b/modules/nf-core/controlfreec/freec2bed/meta.yml @@ -0,0 +1,52 @@ +name: controlfreec_freec2bed +description: Plot Freec output +keywords: + - cna + - cnv + - somatic + - single + - tumor-only +tools: + - controlfreec: + description: Copy number and genotype annotation from whole genome and whole exome + sequencing data. + homepage: http://boevalab.inf.ethz.ch/FREEC + documentation: http://boevalab.inf.ethz.ch/FREEC/tutorial.html + tool_dev_url: https://github.com/BoevaLab/FREEC/ + doi: "10.1093/bioinformatics/btq635" + licence: ["GPL >=2"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ratio: + type: file + description: ratio file generated by FREEC + pattern: "*.ratio.txt" + ontologies: [] +output: + bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bed": + type: file + description: Bed file + pattern: "*.bed" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/controlfreec/freec2circos/environment.yml b/modules/nf-core/controlfreec/freec2circos/environment.yml new file mode 100644 index 0000000000..3aebc4bde1 --- /dev/null +++ b/modules/nf-core/controlfreec/freec2circos/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::control-freec=11.6b diff --git a/modules/nf-core/controlfreec/freec2circos/main.nf b/modules/nf-core/controlfreec/freec2circos/main.nf new file mode 100644 index 0000000000..16cef8bc59 --- /dev/null +++ b/modules/nf-core/controlfreec/freec2circos/main.nf @@ -0,0 +1,46 @@ +process CONTROLFREEC_FREEC2CIRCOS { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/control-freec:11.6b--hdbdd923_0' + : 'biocontainers/control-freec:11.6b--hdbdd923_0'}" + + input: + tuple val(meta), path(ratio) + + output: + tuple val(meta), path("*.circos.txt"), emit: circos + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '11.6b' + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + freec2circos.pl -f ${ratio} ${args} > ${prefix}.circos.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: ${VERSION} + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '11.6b' + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.circos.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/controlfreec/freec2circos/meta.yml b/modules/nf-core/controlfreec/freec2circos/meta.yml new file mode 100644 index 0000000000..408c981013 --- /dev/null +++ b/modules/nf-core/controlfreec/freec2circos/meta.yml @@ -0,0 +1,52 @@ +name: controlfreec_freec2circos +description: Format Freec output to circos input format +keywords: + - cna + - cnv + - somatic + - single + - tumor-only +tools: + - controlfreec: + description: Copy number and genotype annotation from whole genome and whole exome + sequencing data. + homepage: http://boevalab.inf.ethz.ch/FREEC + documentation: http://boevalab.inf.ethz.ch/FREEC/tutorial.html + tool_dev_url: https://github.com/BoevaLab/FREEC/ + doi: "10.1093/bioinformatics/btq635" + licence: ["GPL >=2"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ratio: + type: file + description: ratio file generated by FREEC + pattern: "*.ratio.txt" + ontologies: [] +output: + circos: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.circos.txt": + type: file + description: Txt file + pattern: "*.circos.txt" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/controlfreec/makegraph2/environment.yml b/modules/nf-core/controlfreec/makegraph2/environment.yml new file mode 100644 index 0000000000..3aebc4bde1 --- /dev/null +++ b/modules/nf-core/controlfreec/makegraph2/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::control-freec=11.6b diff --git a/modules/nf-core/controlfreec/makegraph2/main.nf b/modules/nf-core/controlfreec/makegraph2/main.nf new file mode 100644 index 0000000000..d813afea66 --- /dev/null +++ b/modules/nf-core/controlfreec/makegraph2/main.nf @@ -0,0 +1,56 @@ +process CONTROLFREEC_MAKEGRAPH2 { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/control-freec:11.6b--hdbdd923_0' + : 'biocontainers/control-freec:11.6b--hdbdd923_0'}" + + input: + tuple val(meta), path(ratio), path(baf) + + output: + tuple val(meta), path("*_BAF.png"), emit: png_baf + tuple val(meta), path("*_ratio.log2.png"), emit: png_ratio_log2 + tuple val(meta), path("*_ratio.png"), emit: png_ratio + + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def baf_cmd = baf ?: "" + def VERSION = '11.6b' + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + cat \$(which makeGraph2.0.R) | R --slave --args ${args} ${ratio} ${baf_cmd} + + mv *_BAF.txt.png ${prefix}_BAF.png + mv *_ratio.txt.log2.png ${prefix}_ratio.log2.png + mv *_ratio.txt.png ${prefix}_ratio.png + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: ${VERSION} + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '11.6b' + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}_BAF.png + touch ${prefix}_ratio.log2.png + touch ${prefix}_ratio.png + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + controlfreec: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/controlfreec/makegraph2/meta.yml b/modules/nf-core/controlfreec/makegraph2/meta.yml new file mode 100644 index 0000000000..17026f8531 --- /dev/null +++ b/modules/nf-core/controlfreec/makegraph2/meta.yml @@ -0,0 +1,79 @@ +name: controlfreec_makegraph2 +description: Plot Freec output +keywords: + - cna + - cnv + - somatic + - single + - tumor-only +tools: + - controlfreec: + description: Copy number and genotype annotation from whole genome and whole exome + sequencing data. + homepage: http://boevalab.inf.ethz.ch/FREEC + documentation: http://boevalab.inf.ethz.ch/FREEC/tutorial.html + tool_dev_url: https://github.com/BoevaLab/FREEC/ + doi: "10.1093/bioinformatics/btq635" + licence: ["GPL >=2"] + identifier: "" + +input: + # Only when we have meta + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ratio: + type: file + description: ratio file generated by FREEC + pattern: "*.ratio.txt" + ontologies: [] + - baf: + type: file + description: .BAF file generated by FREEC + pattern: "*.BAF" + ontologies: [] +output: + png_baf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_BAF.png": + type: file + description: Image of BAF plot + pattern: "*_BAF.png" + ontologies: [] + png_ratio_log2: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_ratio.log2.png": + type: file + description: Image of ratio log2 plot + pattern: "*_ratio.log2.png" + ontologies: [] + png_ratio: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_ratio.png": + type: file + description: Image of ratio plot + pattern: "*_ratio.png" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@FriederikeHanssen" diff --git a/modules/nf-core/deepvariant/rundeepvariant/main.nf b/modules/nf-core/deepvariant/rundeepvariant/main.nf new file mode 100644 index 0000000000..33da4675ed --- /dev/null +++ b/modules/nf-core/deepvariant/rundeepvariant/main.nf @@ -0,0 +1,73 @@ +process DEEPVARIANT_RUNDEEPVARIANT { + tag "$meta.id" + label 'process_high' + + // FIXME Conda is not supported at the moment + // https://github.com/bioconda/bioconda-recipes/pull/45214#issuecomment-1890937836 + // BUG https://github.com/nf-core/modules/issues/1754 + // BUG https://github.com/bioconda/bioconda-recipes/issues/30310 + container "docker.io/google/deepvariant:1.9.0" + + input: + tuple val(meta), path(input), path(index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(gzi) + tuple val(meta5), path(par_bed) + + output: + tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.{tbi,csi}") , emit: vcf_index + tuple val(meta), path("${prefix}.g.vcf.gz") , emit: gvcf + tuple val(meta), path("${prefix}.g.vcf.gz.{tbi,csi}") , emit: gvcf_index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def regions = intervals ? "--regions=${intervals}" : "" + def par_regions = par_bed ? "--par_regions_bed=${par_bed}" : "" + + """ + /opt/deepvariant/bin/run_deepvariant \\ + --ref=${fasta} \\ + --reads=${input} \\ + --output_vcf=${prefix}.vcf.gz \\ + --output_gvcf=${prefix}.g.vcf.gz \\ + ${args} \\ + ${regions} \\ + ${par_regions} \\ + --intermediate_results_dir=tmp \\ + --num_shards=${task.cpus} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deepvariant: \$(echo \$(/opt/deepvariant/bin/run_deepvariant --version) | sed 's/^.*version //; s/ .*\$//' ) + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + echo "" | gzip > ${prefix}.g.vcf.gz + touch ${prefix}.g.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deepvariant: \$(echo \$(/opt/deepvariant/bin/run_deepvariant --version) | sed 's/^.*version //; s/ .*\$//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/deepvariant/rundeepvariant/meta.yml b/modules/nf-core/deepvariant/rundeepvariant/meta.yml new file mode 100644 index 0000000000..2c3ba0ad41 --- /dev/null +++ b/modules/nf-core/deepvariant/rundeepvariant/meta.yml @@ -0,0 +1,137 @@ +name: deepvariant_rundeepvariant +description: DeepVariant is an analysis pipeline that uses a deep neural network to + call genetic variants from next-generation DNA sequencing data +keywords: + - variant calling + - machine learning + - neural network +tools: + - deepvariant: + description: DeepVariant is an analysis pipeline that uses a deep neural network + to call genetic variants from next-generation DNA sequencing data + homepage: https://github.com/google/deepvariant + documentation: https://github.com/google/deepvariant + tool_dev_url: https://github.com/google/deepvariant + doi: "10.1038/nbt.4235" + licence: ["BSD-3-clause"] + identifier: biotools:deepvariant +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file + pattern: "*.bam/cram" + ontologies: [] + - index: + type: file + description: Index of BAM/CRAM file + pattern: "*.bai/crai" + ontologies: [] + - intervals: + type: file + description: file containing intervals + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - gzi: + type: file + description: GZI index of reference fasta file + pattern: "*.gzi" + ontologies: [] + - - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - par_bed: + type: file + description: BED file containing PAR regions + pattern: "*.bed" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.vcf.gz: + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + vcf_index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.vcf.gz.{tbi,csi}: + type: file + description: Tabix index file of compressed VCF + pattern: "*.vcf.gz.{tbi,csi}" + ontologies: [] + gvcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.g.vcf.gz: + type: file + description: Compressed GVCF file + pattern: "*.g.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + gvcf_index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.g.vcf.gz.{tbi,csi}: + type: file + description: Tabix index file of compressed GVCF + pattern: "*.g.vcf.gz.{tbi,csi}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@abhi18av" + - "@ramprasadn" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/dragmap/align/environment.yml b/modules/nf-core/dragmap/align/environment.yml new file mode 100644 index 0000000000..198f3fa726 --- /dev/null +++ b/modules/nf-core/dragmap/align/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # WARN: Do not update this tool to 1.3.0 until https://github.com/Illumina/DRAGMAP/issues/47 is resolved + - bioconda::dragmap=1.2.1 + - bioconda::samtools=1.19.2 + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/dragmap/align/main.nf b/modules/nf-core/dragmap/align/main.nf new file mode 100644 index 0000000000..3f6ea75366 --- /dev/null +++ b/modules/nf-core/dragmap/align/main.nf @@ -0,0 +1,90 @@ +process DRAGMAP_ALIGN { + tag "${meta.id}" + label 'process_high' + + conda "${moduleDir}/environment.yml" + // WARN: Do not update this tool to 1.3.0 until https://github.com/Illumina/DRAGMAP/issues/47 is resolved + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:df80ed8d23d0a2c43181a2b3dd1b39f2d00fab5c-0' + : 'biocontainers/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:df80ed8d23d0a2c43181a2b3dd1b39f2d00fab5c-0'}" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(hashmap) + tuple val(meta3), path(fasta) + val sort_bam + + output: + tuple val(meta), path("*.sam"), emit: sam, optional: true + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val(meta), path('*.log'), emit: log + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reads_command = meta.single_end ? "-1 ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" + def samtools_command = sort_bam ? 'sort' : 'view' + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension == "cram" ? "--reference ${fasta}" : "" + if (!fasta && extension == "cram") { + error("Fasta reference is required for CRAM output") + } + + """ + dragen-os \\ + -r ${hashmap} \\ + ${args} \\ + --num-threads ${task.cpus} \\ + ${reads_command} \\ + 2> >(tee ${prefix}.dragmap.log >&2) \\ + | samtools ${samtools_command} ${args2} --threads ${task.cpus} ${reference} -o ${prefix}.${extension} - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dragmap: \$(echo \$(dragen-os --version 2>&1)) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + if (!fasta && extension == "cram") { + error("Fasta reference is required for CRAM output") + } + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } + else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } + + """ + touch ${prefix}.${extension} + ${create_index} + touch ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dragmap: \$(echo \$(dragen-os --version 2>&1)) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/dragmap/align/meta.yml b/modules/nf-core/dragmap/align/meta.yml new file mode 100644 index 0000000000..80f020f58f --- /dev/null +++ b/modules/nf-core/dragmap/align/meta.yml @@ -0,0 +1,117 @@ +name: dragmap_align +description: Performs fastq alignment to a reference using DRAGMAP +keywords: + - alignment + - map + - fastq + - bam + - sam +tools: + - dragmap: + description: Dragmap is the Dragen mapper/aligner Open Source Software. + homepage: https://github.com/Illumina/dragmap + documentation: https://github.com/Illumina/dragmap + tool_dev_url: https://github.com/Illumina/dragmap#basic-command-line-usage + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - hashmap: + type: file + description: DRAGMAP hash table + pattern: "Directory containing DRAGMAP hash table *.{cmp,.bin,.txt}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - fasta: + type: file + description: Genome fasta reference files + pattern: "*.{fa,fasta,fna}" + - - sort_bam: + type: boolean + description: Sort the BAM file +output: + - sam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sam": + type: file + description: Output SAM file containing read alignments + pattern: "*.{sam}" + - bam: + - meta: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - "*.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Output CRAM file containing read alignments + pattern: "*.{cram}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: Index file for CRAM file + pattern: "*.{crai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Index file for CRAM file + pattern: "*.{csi}" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Log file + pattern: "*.{log}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" +maintainers: + - "@edmundmiller" diff --git a/modules/nf-core/dragmap/hashtable/environment.yml b/modules/nf-core/dragmap/hashtable/environment.yml new file mode 100644 index 0000000000..8225f820ca --- /dev/null +++ b/modules/nf-core/dragmap/hashtable/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # WARN: Do not update this tool to 1.3.0 until https://github.com/Illumina/DRAGMAP/issues/47 is resolved + - bioconda::dragmap=1.2.1 diff --git a/modules/nf-core/dragmap/hashtable/main.nf b/modules/nf-core/dragmap/hashtable/main.nf new file mode 100644 index 0000000000..e86b110094 --- /dev/null +++ b/modules/nf-core/dragmap/hashtable/main.nf @@ -0,0 +1,47 @@ +process DRAGMAP_HASHTABLE { + tag "${fasta}" + label 'process_high' + + conda "${moduleDir}/environment.yml" + // WARN: Do not update this tool to 1.3.0 until https://github.com/Illumina/DRAGMAP/issues/47 is resolved + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/dragmap:1.2.1--h72d16da_1' + : 'biocontainers/dragmap:1.2.1--h72d16da_1'}" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("dragmap"), emit: hashmap + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir dragmap + dragen-os \\ + --build-hash-table true \\ + --ht-reference ${fasta} \\ + --output-directory dragmap \\ + ${args} \\ + --ht-num-threads ${task.cpus} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dragmap: \$(echo \$(dragen-os --version 2>&1)) + END_VERSIONS + """ + + stub: + """ + mkdir dragmap + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dragmap: \$(echo \$(dragen-os --version 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/nf-core/dragmap/hashtable/meta.yml b/modules/nf-core/dragmap/hashtable/meta.yml new file mode 100644 index 0000000000..c6367b4739 --- /dev/null +++ b/modules/nf-core/dragmap/hashtable/meta.yml @@ -0,0 +1,48 @@ +name: dragmap_hashtable +description: Create DRAGEN hashtable for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - dragmap: + description: Dragmap is the Dragen mapper/aligner Open Source Software. + homepage: https://github.com/Illumina/dragmap + documentation: https://github.com/Illumina/dragmap + tool_dev_url: https://github.com/Illumina/dragmap#basic-command-line-usage + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file + ontologies: [] +output: + hashmap: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - dragmap: + type: file + description: DRAGMAP hash table + pattern: "*.{cmp,.bin,.txt}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@edmundmiller" +maintainers: + - "@edmundmiller" diff --git a/modules/nf-core/ensemblvep/download/environment.yml b/modules/nf-core/ensemblvep/download/environment.yml new file mode 100644 index 0000000000..7e60f7f954 --- /dev/null +++ b/modules/nf-core/ensemblvep/download/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::ensembl-vep=115.2 + - bioconda::perl-math-cdf=0.1 diff --git a/modules/nf-core/ensemblvep/download/main.nf b/modules/nf-core/ensemblvep/download/main.nf new file mode 100644 index 0000000000..7e1aeefed2 --- /dev/null +++ b/modules/nf-core/ensemblvep/download/main.nf @@ -0,0 +1,38 @@ +process ENSEMBLVEP_DOWNLOAD { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/3d/3da6e21cbf9803529421d7e136d1ebec5ff71ec50e0d996eda2ce11ec2c19bf9/data' + : 'community.wave.seqera.io/library/ensembl-vep_perl-math-cdf:1e13f65f931a6954'}" + + input: + tuple val(meta), val(assembly), val(species), val(cache_version) + + output: + tuple val(meta), path(prefix), emit: cache + tuple val("${task.process}"), val('ensemblvep'), eval("vep --help | sed -n '/ensembl-vep/s/.*: //p'"), topic: versions, emit: versions_ensemblvep + tuple val("${task.process}"), val('perl-math-cdf'), eval("perl -MMath::CDF -e 'print \$Math::CDF::VERSION'"), topic: versions, emit: versions_perlmathcdf + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: 'vep_cache' + """ + vep_install \\ + --CACHEDIR ${prefix} \\ + --SPECIES ${species} \\ + --ASSEMBLY ${assembly} \\ + --CACHE_VERSION ${cache_version} \\ + ${args} + """ + + stub: + prefix = task.ext.prefix ?: 'vep_cache' + """ + mkdir ${prefix} + """ +} diff --git a/modules/nf-core/ensemblvep/download/meta.yml b/modules/nf-core/ensemblvep/download/meta.yml new file mode 100644 index 0000000000..ae1b9c6fa3 --- /dev/null +++ b/modules/nf-core/ensemblvep/download/meta.yml @@ -0,0 +1,91 @@ +name: ensemblvep_download +description: Ensembl Variant Effect Predictor (VEP). The cache downloading + options are controlled through `task.ext.args`. +keywords: + - annotation + - cache + - download +tools: + - ensemblvep: + description: | + VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs + or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. + homepage: https://www.ensembl.org/info/docs/tools/vep/index.html + documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html + licence: + - "Apache-2.0" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - assembly: + type: string + description: | + Genome assembly + - species: + type: string + description: | + Specie + - cache_version: + type: string + description: | + cache version +output: + cache: + - - meta: + type: file + description: cache + pattern: "*" + ontologies: [] + - prefix: + type: file + description: cache + pattern: "*" + ontologies: [] + versions_ensemblvep: + - - ${task.process}: + type: string + description: The process the versions were collected from + - ensemblvep: + type: string + description: The tool name + - "vep --help | sed -n '/ensembl-vep/s/.*: //p'": + type: eval + description: The command used to generate the version of the tool + versions_perlmathcdf: + - - ${task.process}: + type: string + description: The process the versions were collected from + - perl-math-cdf: + type: string + description: The name of the tool + - perl -MMath::CDF -e 'print \$Math::CDF::VERSION': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - ensemblvep: + type: string + description: The tool name + - "vep --help | sed -n '/ensembl-vep/s/.*: //p'": + type: eval + description: The command used to generate the version of the tool + - - ${task.process}: + type: string + description: The process the versions were collected from + - perl-math-cdf: + type: string + description: The name of the tool + - perl -MMath::CDF -e 'print \$Math::CDF::VERSION': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/ensemblvep/vep/environment.yml b/modules/nf-core/ensemblvep/vep/environment.yml new file mode 100644 index 0000000000..7e60f7f954 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::ensembl-vep=115.2 + - bioconda::perl-math-cdf=0.1 diff --git a/modules/nf-core/ensemblvep/vep/main.nf b/modules/nf-core/ensemblvep/vep/main.nf new file mode 100644 index 0000000000..6c6da53d2d --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/main.nf @@ -0,0 +1,67 @@ +process ENSEMBLVEP_VEP { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/3d/3da6e21cbf9803529421d7e136d1ebec5ff71ec50e0d996eda2ce11ec2c19bf9/data' + : 'community.wave.seqera.io/library/ensembl-vep_perl-math-cdf:1e13f65f931a6954'}" + + input: + tuple val(meta), path(vcf), path(custom_extra_files) + val genome + val species + val cache_version + path cache + tuple val(meta2), path(fasta) + path extra_files + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf, optional: true + tuple val(meta), path("*.vcf.gz.tbi"), emit: tbi, optional: true + tuple val(meta), path("*.tab.gz"), emit: tab, optional: true + tuple val(meta), path("*.json.gz"), emit: json, optional: true + tuple val(meta), val("${task.process}"), val('ensemblvep'), path("*.html"), topic: multiqc_files, emit: report, optional: true + tuple val("${task.process}"), val('ensemblvep'), eval("vep --help | sed -n '/ensembl-vep/s/.*: //p'"), topic: versions, emit: versions_ensemblvep + tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'"), topic: versions, emit: versions_tabix + tuple val("${task.process}"), val('perl-math-cdf'), eval("perl -MMath::CDF -e 'print \\\$Math::CDF::VERSION'"), topic: versions, emit: versions_perlmathcdf + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json") ? 'json' : args.contains("--tab") ? 'tab' : 'vcf' + def compress_cmd = args.contains("--compress_output") ? '' : '--compress_output bgzip' + def prefix = task.ext.prefix ?: "${meta.id}" + def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" + def reference = fasta ? "--fasta ${fasta}" : "" + def create_index = file_extension == "vcf" ? "tabix ${args2} ${prefix}.${file_extension}.gz" : "" + """ + vep \\ + -i ${vcf} \\ + -o ${prefix}.${file_extension}.gz \\ + ${args} \\ + ${compress_cmd} \\ + ${reference} \\ + --assembly ${genome} \\ + --species ${species} \\ + --cache \\ + --cache_version ${cache_version} \\ + --dir_cache ${dir_cache} \\ + --fork ${task.cpus} + + ${create_index} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json") ? 'json' : args.contains("--tab") ? 'tab' : 'vcf' + def create_index = file_extension == "vcf" ? "touch ${prefix}.${file_extension}.gz.tbi" : "" + """ + echo "" | gzip > ${prefix}.${file_extension}.gz + ${create_index} + touch ${prefix}_summary.html + """ +} diff --git a/modules/nf-core/ensemblvep/vep/meta.yml b/modules/nf-core/ensemblvep/vep/meta.yml new file mode 100644 index 0000000000..fc84e3764e --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/meta.yml @@ -0,0 +1,215 @@ +name: ensemblvep_vep +description: Ensembl Variant Effect Predictor (VEP). The output-file-format is + controlled through `task.ext.args`. +keywords: + - annotation + - vcf + - json + - tab +tools: + - ensemblvep: + description: | + VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs + or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. + homepage: https://www.ensembl.org/info/docs/tools/vep/index.html + documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html + licence: + - "Apache-2.0" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + vcf to annotate + ontologies: [] + - custom_extra_files: + type: file + description: | + extra sample-specific files to be used with the `--custom` flag to be configured with ext.args + (optional) + ontologies: [] + - genome: + type: string + description: | + which genome to annotate with + - species: + type: string + description: | + which species to annotate with + - cache_version: + type: integer + description: | + which version of the cache to annotate with + - cache: + type: file + description: | + path to VEP cache (optional) + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing fasta reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: | + reference FASTA file (optional) + pattern: "*.{fasta,fa}" + ontologies: [] + - extra_files: + type: file + description: | + path to file(s) needed for plugins (optional) + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Map with sample information + - "*.vcf.gz": + type: file + description: | + annotated vcf (optional) + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 + tbi: + - - meta: + type: map + description: | + Map with sample information + - "*.vcf.gz.tbi": + type: file + description: | + annotated vcf index (optional) + pattern: "*.vcf.gz.tbi" + ontologies: [] + tab: + - - meta: + type: map + description: | + Map with sample information + - "*.tab.gz": + type: file + description: | + tab file with annotated variants (optional) + pattern: "*.ann.tab.gz" + ontologies: + - edam: http://edamontology.org/format_3989 + json: + - - meta: + type: map + description: | + Map with sample information + - "*.json.gz": + type: file + description: | + json file with annotated variants (optional) + pattern: "*.ann.json.gz" + ontologies: + - edam: http://edamontology.org/format_3989 + report: + - - meta: + type: map + description: | + Map with sample information + - ${task.process}: + type: string + description: The process + - ensemblvep: + type: string + description: The tool name + - "*.html": + type: file + description: VEP report file + pattern: "*.html" + ontologies: [] + versions_ensemblvep: + - - ${task.process}: + type: string + description: The process + - ensemblvep: + type: string + description: The tool name + - "vep --help | sed -n '/ensembl-vep/s/.*: //p'": + type: eval + description: The command used to generate the version of the tool + versions_tabix: + - - ${task.process}: + type: string + description: The process + - tabix: + type: string + description: The tool name + - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+': + type: eval + description: The expression to obtain the version of the tool + versions_perlmathcdf: + - - ${task.process}: + type: string + description: The process + - perl-math-cdf: + type: string + description: The tool name + - perl -MMath::CDF -e 'print \\$Math::CDF::VERSION': + type: eval + description: The expression to obtain the version of the tool +topics: + multiqc_files: + - - meta: + type: string + description: | + Map with sample information + - ${task.process}: + type: string + description: The process + - ensemblvep: + type: string + description: The tool name + - "*.html": + type: file + description: VEP report file + pattern: "*.html" + ontologies: [] + versions: + - - ${task.process}: + type: string + description: The process + - ensemblvep: + type: string + description: The tool name + - "vep --help | sed -n '/ensembl-vep/s/.*: //p'": + type: eval + description: The command used to generate the version of the tool + - - ${task.process}: + type: string + description: The process + - tabix: + type: string + description: The tool name + - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+': + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The process + - perl-math-cdf: + type: string + description: The tool name + - perl -MMath::CDF -e 'print \\$Math::CDF::VERSION': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" +maintainers: + - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml new file mode 100644 index 0000000000..90adcd2c52 --- /dev/null +++ b/modules/nf-core/fastp/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fastp=0.24.0 diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf new file mode 100644 index 0000000000..1342741d53 --- /dev/null +++ b/modules/nf-core/fastp/main.nf @@ -0,0 +1,125 @@ +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/88/889a182b8066804f4799f3808a5813ad601381a8a0e3baa4ab8d73e739b97001/data' : + 'community.wave.seqera.io/library/fastp:0.24.0--62c97b06e8447690' }" + + input: + tuple val(meta), path(reads) + path adapter_fasta + val discard_trimmed_pass + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + def out_fq1 = discard_trimmed_pass ?: ( meta.single_end ? "--out1 ${prefix}.fastp.fastq.gz" : "--out1 ${prefix}_1.fastp.fastq.gz" ) + def out_fq2 = discard_trimmed_pass ?: "--out2 ${prefix}_2.fastp.fastq.gz" + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --in1 ${prefix}.fastq.gz \\ + $out_fq1 \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz + fastp \\ + --in1 ${prefix}_1.fastq.gz \\ + --in2 ${prefix}_2.fastq.gz \\ + $out_fq1 \\ + $out_fq2 \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = (discard_trimmed_pass) ? "" : (is_single_output) ? "echo '' | gzip > ${prefix}.fastp.fastq.gz" : "echo '' | gzip > ${prefix}_1.fastp.fastq.gz ; echo '' | gzip > ${prefix}_2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "echo '' | gzip > ${prefix}.merged.fastq.gz" : "" + def touch_fail_fastq = (!save_trimmed_fail) ? "" : meta.single_end ? "echo '' | gzip > ${prefix}.fail.fastq.gz" : "echo '' | gzip > ${prefix}.paired.fail.fastq.gz ; echo '' | gzip > ${prefix}_1.fail.fastq.gz ; echo '' | gzip > ${prefix}_2.fail.fastq.gz" + """ + $touch_reads + $touch_fail_fastq + $touch_merged + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml new file mode 100644 index 0000000000..9c4b245844 --- /dev/null +++ b/modules/nf-core/fastp/meta.yml @@ -0,0 +1,114 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: 10.1093/bioinformatics/bty560 + licence: ["MIT"] + identifier: biotools:fastp +input: + - - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" + - - discard_trimmed_pass: + type: boolean + description: | + Specify true to not write any reads that pass trimming thresholds. + This can be used to use fastp for the output report only. + - - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds + ending in `*.fail.fastq.gz` + - - save_merged: + type: boolean + description: Specify true to save all merged reads to a file ending in `*.merged.fastq.gz` +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastp.fastq.gz": + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + - json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: Results in JSON format + pattern: "*.json" + - html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: Results in HTML format + pattern: "*.html" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: fastq log file + pattern: "*.log" + - reads_fail: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fail.fastq.gz": + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + - reads_merged: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.merged.fastq.gz": + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 0000000000..f9f54ee9b4 --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf new file mode 100644 index 0000000000..23e16634c3 --- /dev/null +++ b/modules/nf-core/fastqc/main.nf @@ -0,0 +1,64 @@ +process FASTQC { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ _old_name, new_name -> new_name }.join(' ') + + // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) + // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 + // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label + def memory_in_mb = task.memory ? task.memory.toUnit('MB') / task.cpus : null + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + + """ + printf "%s %s\\n" ${rename_to} | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + + fastqc \\ + ${args} \\ + --threads ${task.cpus} \\ + --memory ${fastqc_memory} \\ + ${renamed_files} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml new file mode 100644 index 0000000000..c8d9d025ac --- /dev/null +++ b/modules/nf-core/fastqc/meta.yml @@ -0,0 +1,72 @@ +name: fastqc +description: Run FastQC on sequenced reads +keywords: + - quality control + - qc + - adapters + - fastq +tools: + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ + licence: ["GPL-2.0-only"] + identifier: biotools:fastqc +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + ontologies: [] +output: + html: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + ontologies: [] + zip: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.zip": + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fgbio/callmolecularconsensusreads/environment.yml b/modules/nf-core/fgbio/callmolecularconsensusreads/environment.yml new file mode 100644 index 0000000000..4ebc0924d7 --- /dev/null +++ b/modules/nf-core/fgbio/callmolecularconsensusreads/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fgbio=2.4.0 diff --git a/modules/nf-core/fgbio/callmolecularconsensusreads/main.nf b/modules/nf-core/fgbio/callmolecularconsensusreads/main.nf new file mode 100644 index 0000000000..7d2e660d02 --- /dev/null +++ b/modules/nf-core/fgbio/callmolecularconsensusreads/main.nf @@ -0,0 +1,64 @@ +process FGBIO_CALLMOLECULARCONSENSUSREADS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/87/87626ef674e2f19366ae6214575a114fe80ce598e796894820550731706a84be/data' : + 'community.wave.seqera.io/library/fgbio:2.4.0--913bad9d47ff8ddc' }" + + input: + tuple val(meta), path(grouped_bam) + val min_reads + val min_baseq + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped" + def mem_gb = 8 + if (!task.memory) { + log.info '[fgbio CallMolecularConsensusReads] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.' + } else { + mem_gb = task.memory.giga + } + if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + fgbio \\ + -Xmx${mem_gb}g \\ + --tmp-dir=. \\ + --async-io=true \\ + --compression=1 \\ + CallMolecularConsensusReads \\ + --input $grouped_bam \\ + --output ${prefix}.bam \\ + --min-reads ${min_reads} \\ + --min-input-base-quality ${min_baseq} \\ + --threads ${task.cpus} \\ + $args; + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped" + if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//') + END_VERSIONS + """ + +} diff --git a/modules/nf-core/fgbio/callmolecularconsensusreads/meta.yml b/modules/nf-core/fgbio/callmolecularconsensusreads/meta.yml new file mode 100644 index 0000000000..846c297b19 --- /dev/null +++ b/modules/nf-core/fgbio/callmolecularconsensusreads/meta.yml @@ -0,0 +1,51 @@ +name: fgbio_callmolecularconsensusreads +description: Calls consensus sequences from reads with the same unique molecular tag. +keywords: + - UMIs + - consensus sequence + - bam +tools: + - fgbio: + description: Tools for working with genomic and high throughput sequencing data. + homepage: https://github.com/fulcrumgenomics/fgbio + documentation: http://fulcrumgenomics.github.io/fgbio/ + licence: ["MIT"] + identifier: biotools:fgbio +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false, collapse:false ] + - grouped_bam: + type: file + description: | + The input SAM or BAM file, grouped by UMIs + pattern: "*.{bam,sam}" + - - min_reads: + type: integer + description: Minimum number of original reads to build each consensus read. + - - min_baseq: + type: integer + description: Ignore bases in raw reads that have Q below this value. +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: | + Output SAM or BAM file to write consensus reads. + pattern: "*.{bam,sam}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@sruthipsuresh" +maintainers: + - "@sruthipsuresh" diff --git a/modules/nf-core/fgbio/copyumifromreadname/environment.yml b/modules/nf-core/fgbio/copyumifromreadname/environment.yml new file mode 100644 index 0000000000..4ebc0924d7 --- /dev/null +++ b/modules/nf-core/fgbio/copyumifromreadname/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fgbio=2.4.0 diff --git a/modules/nf-core/fgbio/copyumifromreadname/main.nf b/modules/nf-core/fgbio/copyumifromreadname/main.nf new file mode 100644 index 0000000000..b15c970af1 --- /dev/null +++ b/modules/nf-core/fgbio/copyumifromreadname/main.nf @@ -0,0 +1,64 @@ +process FGBIO_COPYUMIFROMREADNAME { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/87/87626ef674e2f19366ae6214575a114fe80ce598e796894820550731706a84be/data' : + 'community.wave.seqera.io/library/fgbio:2.4.0--913bad9d47ff8ddc' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.bai"), emit: bai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_umi_extracted" + def mem_gb = 8 + if (!task.memory) { + log.info '[fgbio CopyUmiFromReadName] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.' + } else if (mem_gb > task.memory.giga) { + if (task.memory.giga < 2) { + mem_gb = 1 + } else { + mem_gb = task.memory.giga - 1 + } + } + """ + fgbio \\ + -Xmx${mem_gb}g \\ + --tmp-dir=. \\ + --async-io=true \\ + CopyUmiFromReadName \\ + ${args} \\ + --input ${bam} \\ + --output ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//') + END_VERSIONS + """ + + + stub: + def prefix = task.ext.prefix ?: "${meta.id}_umi_extracted" + """ + + touch ${prefix}.bam + touch ${prefix}.bai + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fgbio: \$(fgbio --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/fgbio/copyumifromreadname/meta.yml b/modules/nf-core/fgbio/copyumifromreadname/meta.yml new file mode 100644 index 0000000000..7e7b30f71d --- /dev/null +++ b/modules/nf-core/fgbio/copyumifromreadname/meta.yml @@ -0,0 +1,79 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "fgbio_copyumifromreadname" +description: Copies the UMI at the end of a bam files read name to the RX tag. +keywords: + - sort + - example + - genomics +tools: + - "fgbio": + description: "A set of tools for working with genomic and high throughput sequencing + data, including UMIs" + homepage: http://fulcrumgenomics.github.io/fgbio/ + documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/CallDuplexConsensusReads.html + tool_dev_url: https://github.com/fulcrumgenomics/fgbio + licence: ["MIT"] + identifier: biotools:fgbio + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + - edam: "http://edamontology.org/format_2573" # CRAM + - edam: "http://edamontology.org/format_3462" # SAM + + - bai: + type: file + description: Index for bam file + pattern: "*.{bai}" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + - edam: "http://edamontology.org/format_2573" # CRAM + - edam: "http://edamontology.org/format_3462" # SAM + +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.bam": + type: file + description: Sorted BAM file + pattern: "*.{bam}" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.bai": + type: file + description: Index for bam file + pattern: "*.{bai}" + ontologies: + - edam: "http://edamontology.org/format_3327" # BAI + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@sppearce" +maintainers: + - "@sppearce" diff --git a/modules/nf-core/fgbio/fastqtobam/environment.yml b/modules/nf-core/fgbio/fastqtobam/environment.yml new file mode 100644 index 0000000000..4ebc0924d7 --- /dev/null +++ b/modules/nf-core/fgbio/fastqtobam/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fgbio=2.4.0 diff --git a/modules/nf-core/fgbio/fastqtobam/main.nf b/modules/nf-core/fgbio/fastqtobam/main.nf new file mode 100644 index 0000000000..b4223db0f8 --- /dev/null +++ b/modules/nf-core/fgbio/fastqtobam/main.nf @@ -0,0 +1,70 @@ +process FGBIO_FASTQTOBAM { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/87/87626ef674e2f19366ae6214575a114fe80ce598e796894820550731706a84be/data' : + 'community.wave.seqera.io/library/fgbio:2.4.0--913bad9d47ff8ddc' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.bam") , emit: bam , optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "bam" + def sample_name = args.contains("--sample") ? "" : "--sample ${prefix}" + def library_name = args.contains("--library") ? "" : "--library ${prefix}" + + def mem_gb = 8 + if (!task.memory) { + log.info '[fgbio FastqToBam] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.' + } else if (mem_gb > task.memory.giga) { + if (task.memory.giga < 2) { + mem_gb = 1 + } else { + mem_gb = task.memory.giga - 1 + } + } + + """ + fgbio \\ + -Xmx${mem_gb}g \\ + --tmp-dir=. \\ + --async-io=true \\ + FastqToBam \\ + ${args} \\ + --input ${reads} \\ + --output ${prefix}.${suffix} \\ + ${sample_name} \\ + ${library_name} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "bam" + + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/fgbio/fastqtobam/meta.yml b/modules/nf-core/fgbio/fastqtobam/meta.yml new file mode 100644 index 0000000000..bce76cf8c0 --- /dev/null +++ b/modules/nf-core/fgbio/fastqtobam/meta.yml @@ -0,0 +1,60 @@ +name: fgbio_fastqtobam +description: | + Using the fgbio tools, converts FASTQ files sequenced into unaligned BAM or CRAM files possibly moving the UMI barcode into the RX field of the reads +keywords: + - unaligned + - bam + - cram +tools: + - fgbio: + description: A set of tools for working with genomic and high throughput sequencing + data, including UMIs + homepage: http://fulcrumgenomics.github.io/fgbio/ + documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/ + tool_dev_url: https://github.com/fulcrumgenomics/fgbio + licence: ["MIT"] + identifier: biotools:fgbio +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: pair of reads to be converted into BAM file + pattern: "*.{fastq.gz}" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Unaligned, unsorted BAM file + pattern: "*.{bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Unaligned, unsorted CRAM file + pattern: "*.{cram}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@lescai" + - "@matthdsm" + - "@nvnieuwk" +maintainers: + - "@lescai" + - "@matthdsm" + - "@nvnieuwk" diff --git a/modules/nf-core/fgbio/groupreadsbyumi/environment.yml b/modules/nf-core/fgbio/groupreadsbyumi/environment.yml new file mode 100644 index 0000000000..4ebc0924d7 --- /dev/null +++ b/modules/nf-core/fgbio/groupreadsbyumi/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fgbio=2.4.0 diff --git a/modules/nf-core/fgbio/groupreadsbyumi/main.nf b/modules/nf-core/fgbio/groupreadsbyumi/main.nf new file mode 100644 index 0000000000..c0506c9022 --- /dev/null +++ b/modules/nf-core/fgbio/groupreadsbyumi/main.nf @@ -0,0 +1,67 @@ +process FGBIO_GROUPREADSBYUMI { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/87/87626ef674e2f19366ae6214575a114fe80ce598e796894820550731706a84be/data' : + 'community.wave.seqera.io/library/fgbio:2.4.0--913bad9d47ff8ddc' }" + + input: + tuple val(meta), path(bam) + val(strategy) + + output: + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*histogram.txt"), emit: histogram + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_umi-grouped" + def mem_gb = 8 + if (!task.memory) { + log.info '[fgbio FilterConsensusReads] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.' + } else if (mem_gb > task.memory.giga) { + if (task.memory.giga < 2) { + mem_gb = 1 + } else { + mem_gb = task.memory.giga - 1 + } + } + + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + """ + fgbio \\ + -Xmx${mem_gb}g \\ + --tmp-dir=. \\ + GroupReadsByUmi \\ + -s $strategy \\ + $args \\ + -i $bam \\ + -o ${prefix}.bam \\ + -f ${prefix}_histogram.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}_umi-grouped" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.bam + touch ${prefix}_histogram.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/fgbio/groupreadsbyumi/meta.yml b/modules/nf-core/fgbio/groupreadsbyumi/meta.yml new file mode 100644 index 0000000000..c6d588daf1 --- /dev/null +++ b/modules/nf-core/fgbio/groupreadsbyumi/meta.yml @@ -0,0 +1,68 @@ +name: fgbio_groupreadsbyumi +description: | + Groups reads together that appear to have come from the same original molecule. + Reads are grouped by template, and then templates are sorted by the 5’ mapping positions + of the reads from the template, used from earliest mapping position to latest. + Reads that have the same end positions are then sub-grouped by UMI sequence. + (!) Note: the MQ tag is required on reads with mapped mates (!) + This can be added using samblaster with the optional argument --addMateTags. +keywords: + - UMI + - groupreads + - fgbio +tools: + - fgbio: + description: A set of tools for working with genomic and high throughput sequencing + data, including UMIs + homepage: http://fulcrumgenomics.github.io/fgbio/ + documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/ + tool_dev_url: https://github.com/fulcrumgenomics/fgbio + licence: ["MIT"] + identifier: biotools:fgbio +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: | + BAM file. Note: the MQ tag is required on reads with mapped mates (!) + pattern: "*.bam" + - - strategy: + type: string + enum: ["Identity", "Edit", "Adjacency", "Paired"] + description: | + Required argument: defines the UMI assignment strategy. + Must be chosen among: Identity, Edit, Adjacency, Paired. +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: UMI-grouped BAM + pattern: "*.bam" + - histogram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*histogram.txt": + type: file + description: A text file containing the tag family size counts + pattern: "*.txt" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@lescai" +maintainers: + - "@lescai" diff --git a/modules/nf-core/freebayes/environment.yml b/modules/nf-core/freebayes/environment.yml new file mode 100644 index 0000000000..79cb3eadc8 --- /dev/null +++ b/modules/nf-core/freebayes/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::freebayes=1.3.10 diff --git a/modules/nf-core/freebayes/main.nf b/modules/nf-core/freebayes/main.nf new file mode 100644 index 0000000000..9d949add0b --- /dev/null +++ b/modules/nf-core/freebayes/main.nf @@ -0,0 +1,61 @@ +process FREEBAYES { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/freebayes:1.3.10--hbefcdb2_0' + : 'biocontainers/freebayes:1.3.10--hbefcdb2_0'}" + + input: + tuple val(meta), path(input_1), path(input_1_index), path(input_2), path(input_2_index), path(target_bed) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fasta_fai) + tuple val(meta4), path(samples) + tuple val(meta5), path(populations) + tuple val(meta6), path(cnv) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input = input_2 ? "${input_1} ${input_2}" : "${input_1}" + def targets_file = target_bed ? "--target ${target_bed}" : "" + def samples_file = samples ? "--samples ${samples}" : "" + def populations_file = populations ? "--populations ${populations}" : "" + def cnv_file = cnv ? "--cnv-map ${cnv}" : "" + """ + freebayes \\ + -f ${fasta} \\ + ${targets_file} \\ + ${samples_file} \\ + ${populations_file} \\ + ${cnv_file} \\ + ${args} \\ + ${input} > ${prefix}.vcf + + bgzip ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freebayes: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo | gzip > ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freebayes: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/freebayes/meta.yml b/modules/nf-core/freebayes/meta.yml new file mode 100644 index 0000000000..5593bcaaa9 --- /dev/null +++ b/modules/nf-core/freebayes/meta.yml @@ -0,0 +1,135 @@ +name: freebayes +description: A haplotype-based variant detector +keywords: + - variant caller + - SNP + - genotyping + - somatic variant calling + - germline variant calling + - bacterial variant calling + - bayesian +tools: + - freebayes: + description: Bayesian haplotype-based polymorphism discovery and genotyping + homepage: https://github.com/freebayes/freebayes + documentation: https://github.com/freebayes/freebayes + tool_dev_url: https://github.com/freebayes/freebayes + doi: "10.48550/arXiv.1207.3907" + licence: ["MIT"] + identifier: biotools:freebayes +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_1: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - input_1_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai}" + ontologies: [] + - input_2: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - input_2_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai}" + ontologies: [] + - target_bed: + type: file + description: Optional - Limit analysis to targets listed in this BED-format + FILE. + pattern: "*.bed" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test_reference' ] + - fasta: + type: file + description: reference fasta file + pattern: ".{fa,fa.gz,fasta,fasta.gz}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test_reference' ] + - fasta_fai: + type: file + description: reference fasta file index + pattern: "*.{fa,fasta}.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing meta information for the samples file. + e.g. [ id:'test_samples' ] + - samples: + type: file + description: Optional - Limit analysis to samples listed (one per line) in the + FILE. + pattern: "*.txt" + ontologies: [] + - - meta5: + type: map + description: | + Groovy Map containing meta information for the populations file. + e.g. [ id:'test_populations' ] + - populations: + type: file + description: Optional - Each line of FILE should list a sample and a population + which it is part of. + pattern: "*.txt" + ontologies: [] + - - meta6: + type: map + description: | + Groovy Map containing meta information for the cnv file. + e.g. [ id:'test_cnv' ] + - cnv: + type: file + description: | + A copy number map BED file, which has either a sample-level ploidy: + sample_name copy_number + or a region-specific format: + seq_name start end sample_name copy_number + pattern: "*.bed" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz": + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + versions: + - versions.yml: + type: file + description: File containing software version + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@maxibor" + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@maxibor" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/applybqsr/environment.yml b/modules/nf-core/gatk4/applybqsr/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/applybqsr/main.nf b/modules/nf-core/gatk4/applybqsr/main.nf new file mode 100644 index 0000000000..75b7be0594 --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/main.nf @@ -0,0 +1,72 @@ +process GATK4_APPLYBQSR { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd'}" + + input: + tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) + path fasta + path fai + path dict + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}*bai"), emit: bai, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + // suffix can only be bam or cram, cram being the sensible default + def suffix = task.ext.suffix && task.ext.suffix == "bam" ? "bam" : "cram" + def interval_command = intervals ? "--intervals ${intervals}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + ApplyBQSR \\ + --input ${input} \\ + --output ${prefix}.${suffix} \\ + --reference ${fasta} \\ + --bqsr-recal-file ${bqsr_table} \\ + ${interval_command} \\ + --tmp-dir . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "cram" + """ + touch ${prefix}.${suffix} + if [[ ${suffix} == cram ]]; then + touch ${prefix}.cram.bai + else + touch ${prefix}.bai + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/applybqsr/meta.yml b/modules/nf-core/gatk4/applybqsr/meta.yml new file mode 100644 index 0000000000..6e7bedad71 --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/meta.yml @@ -0,0 +1,105 @@ +name: gatk4_applybqsr +description: Apply base quality score recalibration (BQSR) to a bam file +keywords: + - bam + - base quality score recalibration + - bqsr + - cram + - gatk4 +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + ontologies: [] + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + ontologies: [] + - bqsr_table: + type: file + description: Recalibration table from gatk4_baserecalibrator + ontologies: [] + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + ontologies: [] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + ontologies: [] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + ontologies: [] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + ontologies: [] +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: Recalibrated BAM file + pattern: "${prefix}.bam" + ontologies: [] + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}*bai: + type: file + description: Recalibrated BAM index file + pattern: "${prefix}*bai" + ontologies: [] + cram: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: Recalibrated CRAM file + pattern: "${prefix}.cram" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@yocra3" + - "@FriederikeHanssen" +maintainers: + - "@yocra3" + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/applyvqsr/environment.yml b/modules/nf-core/gatk4/applyvqsr/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/applyvqsr/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/applyvqsr/main.nf b/modules/nf-core/gatk4/applyvqsr/main.nf new file mode 100644 index 0000000000..c8ea3da5bf --- /dev/null +++ b/modules/nf-core/gatk4/applyvqsr/main.nf @@ -0,0 +1,63 @@ +process GATK4_APPLYVQSR { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(vcf), path(vcf_tbi), path(recal), path(recal_index), path(tranches) + path fasta + path fai + path dict + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference_command = fasta ? "--reference $fasta" : '' + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK ApplyVQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + ApplyVQSR \\ + --variant ${vcf} \\ + --output ${prefix}.vcf.gz \\ + $reference_command \\ + --tranches-file $tranches \\ + --recal-file $recal \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/applyvqsr/meta.yml b/modules/nf-core/gatk4/applyvqsr/meta.yml new file mode 100644 index 0000000000..ceedff621e --- /dev/null +++ b/modules/nf-core/gatk4/applyvqsr/meta.yml @@ -0,0 +1,91 @@ +name: gatk4_applyvqsr +description: | + Apply a score cutoff to filter variants based on a recalibration table. + AplyVQSR performs the second pass in a two-stage process called Variant Quality Score Recalibration (VQSR). + Specifically, it applies filtering to the input variants based on the recalibration table produced + in the first step by VariantRecalibrator and a target sensitivity value. +keywords: + - gatk4 + - variant quality score recalibration + - vcf + - vqsr +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: file + description: VCF file to be recalibrated, this should be the same file as used + for the first stage VariantRecalibrator. + pattern: "*.vcf" + - vcf_tbi: + type: file + description: tabix index for the input vcf file. + pattern: "*.vcf.tbi" + - recal: + type: file + description: Recalibration file produced when the input vcf was run through + VariantRecalibrator in stage 1. + pattern: "*.recal" + - recal_index: + type: file + description: Index file for the recalibration file. + pattern: ".recal.idx" + - tranches: + type: file + description: Tranches file produced when the input vcf was run through VariantRecalibrator + in stage 1. + pattern: ".tranches" + - - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - vcf: + - meta: + type: file + description: compressed vcf file containing the recalibrated variants. + pattern: "*.vcf.gz" + - "*.vcf.gz": + type: file + description: compressed vcf file containing the recalibrated variants. + pattern: "*.vcf.gz" + - tbi: + - meta: + type: file + description: Index of recalibrated vcf file. + pattern: "*vcf.gz.tbi" + - "*.tbi": + type: file + description: Index of recalibrated vcf file. + pattern: "*vcf.gz.tbi" + - versions: + - versions.yml: + type: file + description: File containing software versions. + pattern: "versions.yml" +authors: + - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/baserecalibrator/environment.yml b/modules/nf-core/gatk4/baserecalibrator/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/baserecalibrator/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/baserecalibrator/main.nf b/modules/nf-core/gatk4/baserecalibrator/main.nf new file mode 100644 index 0000000000..493533c726 --- /dev/null +++ b/modules/nf-core/gatk4/baserecalibrator/main.nf @@ -0,0 +1,63 @@ +process GATK4_BASERECALIBRATOR { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(input), path(input_index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + tuple val(meta5), path(known_sites) + tuple val(meta6), path(known_sites_tbi) + + output: + tuple val(meta), path("*.table"), emit: table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval_command = intervals ? "--intervals $intervals" : "" + def sites_command = known_sites.collect{"--known-sites $it"}.join(' ') + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + BaseRecalibrator \\ + --input $input \\ + --output ${prefix}.table \\ + --reference $fasta \\ + $interval_command \\ + $sites_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.table + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/baserecalibrator/meta.yml b/modules/nf-core/gatk4/baserecalibrator/meta.yml new file mode 100644 index 0000000000..c3caeb8084 --- /dev/null +++ b/modules/nf-core/gatk4/baserecalibrator/meta.yml @@ -0,0 +1,105 @@ +name: gatk4_baserecalibrator +description: Generate recalibration table for Base Quality Score Recalibration (BQSR) +keywords: + - base quality score recalibration + - table + - bqsr + - gatk4 + - sort +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - known_sites: + type: file + description: VCF files with known sites for indels / snps (optional) + pattern: "*.vcf.gz" + - - meta6: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - known_sites_tbi: + type: file + description: Tabix index of the known_sites (optional) + pattern: "*.vcf.gz.tbi" +output: + - table: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.table": + type: file + description: Recalibration table from BaseRecalibrator + pattern: "*.{table}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/calculatecontamination/environment.yml b/modules/nf-core/gatk4/calculatecontamination/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/calculatecontamination/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/modules/gatk4/calculatecontamination/main.nf b/modules/nf-core/gatk4/calculatecontamination/main.nf similarity index 53% rename from modules/nf-core/modules/gatk4/calculatecontamination/main.nf rename to modules/nf-core/gatk4/calculatecontamination/main.nf index 177f4878cf..20fe3c5e13 100644 --- a/modules/nf-core/modules/gatk4/calculatecontamination/main.nf +++ b/modules/nf-core/gatk4/calculatecontamination/main.nf @@ -2,14 +2,13 @@ process GATK4_CALCULATECONTAMINATION { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: tuple val(meta), path(pileup), path(matched) - val segmentout output: tuple val(meta), path('*.contamination.table'), emit: contamination @@ -22,20 +21,21 @@ process GATK4_CALCULATECONTAMINATION { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def matched_command = matched ? " -matched ${matched} " : '' - def segment_command = segmentout ? " -segments ${prefix}.segmentation.table" : '' - def avail_mem = 3 + def matched_command = matched ? "--matched-normal $matched" : '' + + def avail_mem = 3072 if (!task.memory) { log.info '[GATK CalculateContamination] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" CalculateContamination \\ - -I $pileup \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CalculateContamination \\ + --input $pileup \\ + --output ${prefix}.contamination.table \\ $matched_command \\ - -O ${prefix}.contamination.table \\ - $segment_command \\ + --tmp-dir . \\ $args cat <<-END_VERSIONS > versions.yml @@ -43,4 +43,16 @@ process GATK4_CALCULATECONTAMINATION { gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') END_VERSIONS """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.contamination.table + touch ${prefix}.segmentation.table + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/gatk4/calculatecontamination/meta.yml b/modules/nf-core/gatk4/calculatecontamination/meta.yml new file mode 100644 index 0000000000..ee90a48252 --- /dev/null +++ b/modules/nf-core/gatk4/calculatecontamination/meta.yml @@ -0,0 +1,69 @@ +name: gatk4_calculatecontamination +description: | + Calculates the fraction of reads from cross-sample contamination based on summary tables from getpileupsummaries. Output to be used with filtermutectcalls. +keywords: + - gatk4 + - calculatecontamination + - cross-samplecontamination + - getpileupsummaries + - filtermutectcalls +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - pileup: + type: file + description: File containing the pileups summary table of a tumor sample to + be used to calculate contamination. + pattern: "*.pileups.table" + - matched: + type: file + description: File containing the pileups summary table of a normal sample that + matches with the tumor sample specified in pileup argument. This is an optional + input. + pattern: "*.pileups.table" +output: + - contamination: + - meta: + type: file + description: File containing the contamination table. + pattern: "*.contamination.table" + - "*.contamination.table": + type: file + description: File containing the contamination table. + pattern: "*.contamination.table" + - segmentation: + - meta: + type: file + description: output table containing segmentation of tumor minor allele fractions + (optional) + pattern: "*.segmentation.table" + - "*.segmentation.table": + type: file + description: output table containing segmentation of tumor minor allele fractions + (optional) + pattern: "*.segmentation.table" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GCJMackenzie" + - "@maxulysse" +maintainers: + - "@GCJMackenzie" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/cnnscorevariants/environment.yml b/modules/nf-core/gatk4/cnnscorevariants/environment.yml new file mode 100644 index 0000000000..da693d88d5 --- /dev/null +++ b/modules/nf-core/gatk4/cnnscorevariants/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/cnnscorevariants/main.nf b/modules/nf-core/gatk4/cnnscorevariants/main.nf new file mode 100644 index 0000000000..5ff13b41d1 --- /dev/null +++ b/modules/nf-core/gatk4/cnnscorevariants/main.nf @@ -0,0 +1,75 @@ +process GATK4_CNNSCOREVARIANTS { + tag "$meta.id" + label 'process_low' + + //Conda is not supported at the moment: https://github.com/broadinstitute/gatk/issues/7811 + container "nf-core/gatk:4.5.0.0" //Biocontainers is missing a package + + input: + tuple val(meta), path(vcf), path(tbi), path(aligned_input), path(intervals) + path fasta + path fai + path dict + path architecture + path weights + + output: + tuple val(meta), path("*cnn.vcf.gz") , emit: vcf + tuple val(meta), path("*cnn.vcf.gz.tbi"), emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GATK4_CNNSCOREVARIANTS module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def aligned_input_cmd = aligned_input ? "--input $aligned_input" : "" + def interval_command = intervals ? "--intervals $intervals" : "" + def architecture_cmd = architecture ? "--architecture $architecture" : "" + def weights_cmd = weights ? "--weights $weights" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK CnnScoreVariants] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + export THEANO_FLAGS="base_compiledir=\$PWD" + + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CNNScoreVariants \\ + --variant $vcf \\ + --output ${prefix}.cnn.vcf.gz \\ + --reference $fasta \\ + $interval_command \\ + $aligned_input_cmd \\ + $architecture_cmd \\ + $weights_cmd \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + echo "" | gzip -c > ${prefix}.cnn.vcf.gz + touch ${prefix}.cnn.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/cnnscorevariants/meta.yml b/modules/nf-core/gatk4/cnnscorevariants/meta.yml new file mode 100644 index 0000000000..b55c9d9995 --- /dev/null +++ b/modules/nf-core/gatk4/cnnscorevariants/meta.yml @@ -0,0 +1,88 @@ +name: "gatk4_cnnscorevariants" +description: Apply a Convolutional Neural Net to filter annotated variants +keywords: + - cnnscorevariants + - gatk4 + - variants +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF file + pattern: "*.vcf.gz" + - tbi: + type: file + description: VCF index file + pattern: "*.vcf.gz.tbi" + - aligned_input: + type: file + description: BAM/CRAM file from alignment (optional) + pattern: "*.{bam,cram}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - - architecture: + type: file + description: Neural Net architecture configuration json file (optional) + pattern: "*.json" + - - weights: + type: file + description: Keras model HD5 file with neural net weights. (optional) + pattern: "*.hd5" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*cnn.vcf.gz": + type: file + description: Annotated VCF file + pattern: "*.vcf" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*cnn.vcf.gz.tbi": + type: file + description: VCF index file + pattern: "*.vcf.gz.tbi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/createsequencedictionary/environment.yml b/modules/nf-core/gatk4/createsequencedictionary/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf new file mode 100644 index 0000000000..872648a5e9 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf @@ -0,0 +1,53 @@ +process GATK4_CREATESEQUENCEDICTIONARY { + tag "${fasta}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd'}" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('*.dict'), emit: dict + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def avail_mem = 6144 + if (!task.memory) { + log.info('[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateSequenceDictionary \\ + --REFERENCE ${fasta} \\ + --URI ${fasta} \\ + --TMP_DIR . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta.baseName}.dict + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/gatk4/createsequencedictionary/meta.yml new file mode 100644 index 0000000000..72dced28c8 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/meta.yml @@ -0,0 +1,54 @@ +name: gatk4_createsequencedictionary +description: Creates a sequence dictionary for a reference sequence +keywords: + - createsequencedictionary + - dictionary + - fasta + - gatk4 +tools: + - gatk: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Input fasta file + pattern: "*.{fasta,fa}" + ontologies: [] +output: + dict: + - - meta: + type: file + description: gatk dictionary file + pattern: "*.{dict}" + ontologies: [] + - "*.dict": + type: file + description: gatk dictionary file + pattern: "*.{dict}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@maxulysse" + - "@ramprasadn" +maintainers: + - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/estimatelibrarycomplexity/environment.yml b/modules/nf-core/gatk4/estimatelibrarycomplexity/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/estimatelibrarycomplexity/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf b/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf new file mode 100644 index 0000000000..9071279577 --- /dev/null +++ b/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf @@ -0,0 +1,61 @@ +process GATK4_ESTIMATELIBRARYCOMPLEXITY { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(input) + path fasta + path fai + path dict + + output: + tuple val(meta), path('*.metrics'), emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = input.collect(){"--INPUT $it"}.join(" ") + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK EstimateLibraryComplexity] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + EstimateLibraryComplexity \\ + $input_list \\ + --OUTPUT ${prefix}.metrics \\ + $reference \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/estimatelibrarycomplexity/meta.yml b/modules/nf-core/gatk4/estimatelibrarycomplexity/meta.yml new file mode 100644 index 0000000000..4fb06a3a2e --- /dev/null +++ b/modules/nf-core/gatk4/estimatelibrarycomplexity/meta.yml @@ -0,0 +1,60 @@ +name: gatk4_estimatelibrarycomplexity +description: Estimates the numbers of unique molecules in a sequencing library. +keywords: + - duplication metrics + - estimatelibrarycomplexity + - gatk4 + - reporting +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + - - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.metrics": + type: file + description: File containing metrics on the input files + pattern: "*.{metrics}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/filtermutectcalls/environment.yml b/modules/nf-core/gatk4/filtermutectcalls/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/filtermutectcalls/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/filtermutectcalls/main.nf b/modules/nf-core/gatk4/filtermutectcalls/main.nf new file mode 100644 index 0000000000..d3c5bb5ad7 --- /dev/null +++ b/modules/nf-core/gatk4/filtermutectcalls/main.nf @@ -0,0 +1,71 @@ +process GATK4_FILTERMUTECTCALLS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(vcf), path(vcf_tbi), path(stats), path(orientationbias), path(segmentation), path(table), val(estimate) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.vcf.gz.tbi") , emit: tbi + tuple val(meta), path("*.filteringStats.tsv"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def orientationbias_command = orientationbias ? orientationbias.collect{"--orientation-bias-artifact-priors $it"}.join(' ') : '' + def segmentation_command = segmentation ? segmentation.collect{"--tumor-segmentation $it"}.join(' ') : '' + def estimate_command = estimate ? " --contamination-estimate ${estimate} " : '' + def table_command = table ? table.collect{"--contamination-table $it"}.join(' ') : '' + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK FilterMutectCalls] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + FilterMutectCalls \\ + --variant $vcf \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + $orientationbias_command \\ + $segmentation_command \\ + $estimate_command \\ + $table_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + touch ${prefix}.vcf.gz.filteringStats.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/filtermutectcalls/meta.yml b/modules/nf-core/gatk4/filtermutectcalls/meta.yml new file mode 100644 index 0000000000..9287277eb7 --- /dev/null +++ b/modules/nf-core/gatk4/filtermutectcalls/meta.yml @@ -0,0 +1,123 @@ +name: gatk4_filtermutectcalls +description: | + Filters the raw output of mutect2, can optionally use outputs of calculatecontamination and learnreadorientationmodel to improve filtering. +keywords: + - filtermutectcalls + - filter + - gatk4 + - mutect2 + - vcf +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - vcf: + type: file + description: compressed vcf file of mutect2calls + pattern: "*.vcf.gz" + - vcf_tbi: + type: file + description: Tabix index of vcf file + pattern: "*vcf.gz.tbi" + - stats: + type: file + description: Stats file that pairs with output vcf file + pattern: "*vcf.gz.stats" + - orientationbias: + type: file + description: files containing artifact priors for input vcf. Optional input. + pattern: "*.artifact-prior.tar.gz" + - segmentation: + type: file + description: tables containing segmentation information for input vcf. Optional + input. + pattern: "*.segmentation.table" + - table: + type: file + description: table(s) containing contamination data for input vcf. Optional + input, takes priority over estimate. + pattern: "*.contamination.table" + - estimate: + type: float + description: estimation of contamination value as a double. Optional input, + will only be used if table is not specified. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - vcf: + - meta: + type: file + description: file containing filtered mutect2 calls. + pattern: "*.vcf.gz" + - "*.vcf.gz": + type: file + description: file containing filtered mutect2 calls. + pattern: "*.vcf.gz" + - tbi: + - meta: + type: file + description: tbi file that pairs with vcf. + pattern: "*.vcf.gz.tbi" + - "*.vcf.gz.tbi": + type: file + description: tbi file that pairs with vcf. + pattern: "*.vcf.gz.tbi" + - stats: + - meta: + type: file + description: file containing statistics of the filtermutectcalls run. + pattern: "*.filteringStats.tsv" + - "*.filteringStats.tsv": + type: file + description: file containing statistics of the filtermutectcalls run. + pattern: "*.filteringStats.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GCJMackenzie" + - "@maxulysse" + - "@ramprasadn" +maintainers: + - "@GCJMackenzie" + - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/filtervarianttranches/environment.yml b/modules/nf-core/gatk4/filtervarianttranches/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/filtervarianttranches/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/filtervarianttranches/main.nf b/modules/nf-core/gatk4/filtervarianttranches/main.nf new file mode 100644 index 0000000000..c5249b7a05 --- /dev/null +++ b/modules/nf-core/gatk4/filtervarianttranches/main.nf @@ -0,0 +1,65 @@ +process GATK4_FILTERVARIANTTRANCHES { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(vcf), path(tbi), path(intervals) + path resources + path resources_index + path fasta + path fai + path dict + + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.vcf.gz.tbi"), emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def resource_list = resources.collect{"--resource $it"}.join(' ') + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK FilterVariantTranches] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + FilterVariantTranches \\ + --variant $vcf \\ + $resource_list \\ + --output ${prefix}.filtered.vcf.gz \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + echo "" | gzip -c > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/filtervarianttranches/meta.yml b/modules/nf-core/gatk4/filtervarianttranches/meta.yml new file mode 100644 index 0000000000..398bbb07c1 --- /dev/null +++ b/modules/nf-core/gatk4/filtervarianttranches/meta.yml @@ -0,0 +1,86 @@ +name: "gatk4_filtervarianttranches" +description: Apply tranche filtering +keywords: + - filtervarianttranches + - gatk4 + - tranche filtering +tools: + - "gatk4": + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360051308071-FilterVariantTranches + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: a VCF file containing variants, must have info key:CNN_2D + pattern: "*.vcf.gz" + - tbi: + type: file + description: tbi file matching with -vcf + pattern: "*.vcf.gz.tbi" + - intervals: + type: file + description: Intervals + - - resources: + type: list + description: resource A VCF containing known SNP and or INDEL sites. Can be + supplied as many times as necessary + pattern: "*.vcf.gz" + - - resources_index: + type: list + description: Index of resource VCF containing known SNP and or INDEL sites. + Can be supplied as many times as necessary + pattern: "*.vcf.gz" + - - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + - - dict: + type: file + description: GATK sequence dictionary + pattern: ".dict" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz": + type: file + description: VCF file + pattern: "*.vcf.gz" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz.tbi": + type: file + description: VCF index file + pattern: "*.vcf.gz.tbi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/gatherbqsrreports/environment.yml b/modules/nf-core/gatk4/gatherbqsrreports/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/gatherbqsrreports/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/modules/gatk4/gatherbqsrreports/main.nf b/modules/nf-core/gatk4/gatherbqsrreports/main.nf similarity index 55% rename from modules/nf-core/modules/gatk4/gatherbqsrreports/main.nf rename to modules/nf-core/gatk4/gatherbqsrreports/main.nf index f8d91a920b..fdc5a2a723 100644 --- a/modules/nf-core/modules/gatk4/gatherbqsrreports/main.nf +++ b/modules/nf-core/gatk4/gatherbqsrreports/main.nf @@ -1,14 +1,14 @@ process GATK4_GATHERBQSRREPORTS { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: - tuple val(meta), path(recal_table) + tuple val(meta), path(table) output: tuple val(meta), path("*.table"), emit: table @@ -20,21 +20,21 @@ process GATK4_GATHERBQSRREPORTS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def input = recal_table.collect{"-I ${it}"}.join(' ') + def input_list = table.collect{"--input $it"}.join(' ') - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[GATK GatherBQSRReports] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" \\ - GatherBQSRReports \ - ${input} \ - --tmp-dir . \ - $args \ - --output ${prefix}.table + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + GatherBQSRReports \\ + $input_list \\ + --output ${prefix}.table \\ + --tmp-dir . \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gatk4/gatherbqsrreports/meta.yml b/modules/nf-core/gatk4/gatherbqsrreports/meta.yml new file mode 100644 index 0000000000..587175b3a4 --- /dev/null +++ b/modules/nf-core/gatk4/gatherbqsrreports/meta.yml @@ -0,0 +1,46 @@ +name: gatk4_gatherbqsrreports +description: Gathers scattered BQSR recalibration reports into a single file +keywords: + - base quality score recalibration + - bqsr + - gatherbqsrreports + - gatk4 +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["BSD-3-clause"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - table: + type: file + description: File(s) containing BQSR table(s) + pattern: "*.table" +output: + - table: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.table": + type: file + description: File containing joined BQSR table + pattern: "*.table" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/gatherpileupsummaries/environment.yml b/modules/nf-core/gatk4/gatherpileupsummaries/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/gatherpileupsummaries/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/gatherpileupsummaries/main.nf b/modules/nf-core/gatk4/gatherpileupsummaries/main.nf new file mode 100644 index 0000000000..af397a1a2d --- /dev/null +++ b/modules/nf-core/gatk4/gatherpileupsummaries/main.nf @@ -0,0 +1,58 @@ +process GATK4_GATHERPILEUPSUMMARIES { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + + input: + tuple val(meta), path(pileup) + path dict + + output: + tuple val(meta), path("*.pileups.table"), emit: table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = pileup.collect{ "--I $it" }.join(' ') + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK GatherPileupSummaries] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + GatherPileupSummaries \\ + $input_list \\ + --O ${prefix}.pileups.table \\ + --sequence-dictionary $dict \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.pileups.table + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/gatherpileupsummaries/meta.yml b/modules/nf-core/gatk4/gatherpileupsummaries/meta.yml new file mode 100644 index 0000000000..d8b29d2100 --- /dev/null +++ b/modules/nf-core/gatk4/gatherpileupsummaries/meta.yml @@ -0,0 +1,50 @@ +name: gatk4_gatherpileupsummaries +description: write your description here +keywords: + - gatk4 + - mpileup + - sort +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["BSD-3-clause"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pileup: + type: file + description: Pileup files from gatk4/getpileupsummaries + pattern: "*.pileups.table" + - - dict: + type: file + description: dictionary +output: + - table: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.pileups.table": + type: file + description: pileup summaries table file + pattern: "*.pileups.table" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/genomicsdbimport/environment.yml b/modules/nf-core/gatk4/genomicsdbimport/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/genomicsdbimport/main.nf b/modules/nf-core/gatk4/genomicsdbimport/main.nf new file mode 100644 index 0000000000..90f1200dc7 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/main.nf @@ -0,0 +1,104 @@ +process GATK4_GENOMICSDBIMPORT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(vcf), path(tbi), path(interval_file), val(interval_value), path(wspace) + val run_intlist + val run_updatewspace + val input_map + + output: + tuple val(meta), path("$prefix") , optional:true, emit: genomicsdb + tuple val(meta), path("$updated_db") , optional:true, emit: updatedb + tuple val(meta), path("*.interval_list"), optional:true, emit: intervallist + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + // settings for running default create gendb mode + input_command = input_map ? "--sample-name-map ${vcf[0]}" : vcf.collect(){"--variant $it"}.join(' ') + + genomicsdb_command = "--genomicsdb-workspace-path ${prefix}" + interval_command = interval_file ? "--intervals ${interval_file}" : "--intervals ${interval_value}" + updated_db = "" + + // settings changed for running get intervals list mode if run_intlist is true + if (run_intlist) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = "--output-interval-list-to-file ${prefix}.interval_list" + } + + // settings changed for running update gendb mode. input_command same as default, update_db forces module to emit the updated gendb + if (run_updatewspace) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = '' + updated_db = "${wspace}" + } + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK GenomicsDBImport] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + GenomicsDBImport \\ + $input_command \\ + $genomicsdb_command \\ + $interval_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + + genomicsdb_command = "--genomicsdb-workspace-path ${prefix}" + interval_command = interval_file ? "--intervals ${interval_file}" : "--intervals ${interval_value}" + updated_db = "" + + // settings changed for running get intervals list mode if run_intlist is true + if (run_intlist) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = "--output-interval-list-to-file ${prefix}.interval_list" + } + + // settings changed for running update gendb mode. input_command same as default, update_db forces module to emit the updated gendb + if (run_updatewspace) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = '' + updated_db = "${wspace}" + } + + def stub_genomicsdb = genomicsdb_command == "--genomicsdb-workspace-path ${prefix}" ? "touch ${prefix}" : "" + def stub_interval = interval_command == "--output-interval-list-to-file ${prefix}.interval_list" ? "touch ${prefix}.interval_list" : "" + def stub_update = updated_db != "" ? "touch ${wspace}" : "" + + """ + ${stub_genomicsdb} + ${stub_interval} + ${stub_update} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/genomicsdbimport/meta.yml b/modules/nf-core/gatk4/genomicsdbimport/meta.yml new file mode 100644 index 0000000000..ba734b288f --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/meta.yml @@ -0,0 +1,115 @@ +name: gatk4_genomicsdbimport +description: merge GVCFs from multiple samples. For use in joint genotyping or somatic + panel of normal creation. +keywords: + - gatk4 + - genomicsdb + - genomicsdbimport + - jointgenotyping + - panelofnormalscreation +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: list + description: either a list of vcf files to be used to create or update a genomicsdb, + or a file that contains a map to vcf files to be used. + pattern: "*.vcf.gz" + - tbi: + type: list + description: list of tbi files that match with the input vcf files + pattern: "*.vcf.gz_tbi" + - interval_file: + type: file + description: file containing the intervals to be used when creating the genomicsdb + pattern: "*.interval_list" + - interval_value: + type: string + description: if an intervals file has not been specified, the value entered + here will be used as an interval via the "-L" argument + pattern: "example: chr1:1000-10000" + - wspace: + type: file + description: path to an existing genomicsdb to be used in update db mode or + get intervals mode. This WILL NOT specify name of a new genomicsdb in create + db mode. + pattern: "/path/to/existing/gendb" + - - run_intlist: + type: boolean + description: Specify whether to run get interval list mode, this option cannot + be specified at the same time as run_updatewspace. + pattern: "true/false" + - - run_updatewspace: + type: boolean + description: Specify whether to run update genomicsdb mode, this option takes + priority over run_intlist. + pattern: "true/false" + - - input_map: + type: boolean + description: Specify whether the vcf input is providing a list of vcf file(s) + or a single file containing a map of paths to vcf files to be used to create + or update a genomicsdb. + pattern: "*.sample_map" +output: + - genomicsdb: + - meta: + type: directory + description: Directory containing the files that compose the genomicsdb workspace, + this is only output for create mode, as update changes an existing db + pattern: "*/$prefix" + - $prefix: + type: directory + description: Directory containing the files that compose the genomicsdb workspace, + this is only output for create mode, as update changes an existing db + pattern: "*/$prefix" + - updatedb: + - meta: + type: directory + description: Directory containing the files that compose the updated genomicsdb + workspace, this is only output for update mode, and should be the same path + as the input wspace. + pattern: "same/path/as/wspace" + - $updated_db: + type: directory + description: Directory containing the files that compose the updated genomicsdb + workspace, this is only output for update mode, and should be the same path + as the input wspace. + pattern: "same/path/as/wspace" + - intervallist: + - meta: + type: file + description: File containing the intervals used to generate the genomicsdb, + only created by get intervals mode. + pattern: "*.interval_list" + - "*.interval_list": + type: file + description: File containing the intervals used to generate the genomicsdb, + only created by get intervals mode. + pattern: "*.interval_list" + - list: + type: file + description: File containing the intervals used to generate the genomicsdb, + only created by get intervals mode. + pattern: "*.interval_list" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/genotypegvcfs/environment.yml b/modules/nf-core/gatk4/genotypegvcfs/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/genotypegvcfs/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/genotypegvcfs/main.nf b/modules/nf-core/gatk4/genotypegvcfs/main.nf new file mode 100644 index 0000000000..dc2813a350 --- /dev/null +++ b/modules/nf-core/gatk4/genotypegvcfs/main.nf @@ -0,0 +1,68 @@ +process GATK4_GENOTYPEGVCFS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(input), path(gvcf_index), path(intervals), path(intervals_index) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + tuple val(meta5), path(dbsnp) + tuple val(meta6), path(dbsnp_tbi) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_command = input.name.endsWith(".vcf") || input.name.endsWith(".vcf.gz") ? "$input" : "gendb://$input" + def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" + def interval_command = intervals ? "--intervals $intervals" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK GenotypeGVCFs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + GenotypeGVCFs \\ + --variant $input_command \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + $interval_command \\ + $dbsnp_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + echo | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/genotypegvcfs/meta.yml b/modules/nf-core/gatk4/genotypegvcfs/meta.yml new file mode 100644 index 0000000000..0c1fe491fe --- /dev/null +++ b/modules/nf-core/gatk4/genotypegvcfs/meta.yml @@ -0,0 +1,117 @@ +name: gatk4_genotypegvcfs +description: | + Perform joint genotyping on one or more samples pre-called with HaplotypeCaller. +keywords: + - gatk4 + - genotype + - gvcf + - joint genotyping +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["BSD-3-clause"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: | + gVCF(.gz) file or a GenomicsDB + pattern: "*.{vcf,vcf.gz}" + - gvcf_index: + type: file + description: | + index of gvcf file, or empty when providing GenomicsDB + pattern: "*.{idx,tbi}" + - intervals: + type: file + description: Interval file with the genomic regions included in the library + (optional) + - intervals_index: + type: file + description: Interval index file (optional) + - - meta2: + type: map + description: | + Groovy Map containing fasta information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference fasta file + pattern: "*.fasta" + - - meta3: + type: map + description: | + Groovy Map containing fai information + e.g. [ id:'test' ] + - fai: + type: file + description: Reference fasta index file + pattern: "*.fai" + - - meta4: + type: map + description: | + Groovy Map containing dict information + e.g. [ id:'test' ] + - dict: + type: file + description: Reference fasta sequence dict file + pattern: "*.dict" + - - meta5: + type: map + description: | + Groovy Map containing dbsnp information + e.g. [ id:'test' ] + - dbsnp: + type: file + description: dbSNP VCF file + pattern: "*.vcf.gz" + - - meta6: + type: map + description: | + Groovy Map containing dbsnp tbi information + e.g. [ id:'test' ] + - dbsnp_tbi: + type: file + description: dbSNP VCF index file + pattern: "*.tbi" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz": + type: file + description: Genotyped VCF file + pattern: "*.vcf.gz" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Tbi index for VCF file + pattern: "*.vcf.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@santiagorevale" + - "@maxulysse" +maintainers: + - "@santiagorevale" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/getpileupsummaries/environment.yml b/modules/nf-core/gatk4/getpileupsummaries/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/getpileupsummaries/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/getpileupsummaries/main.nf b/modules/nf-core/gatk4/getpileupsummaries/main.nf new file mode 100644 index 0000000000..41fd312811 --- /dev/null +++ b/modules/nf-core/gatk4/getpileupsummaries/main.nf @@ -0,0 +1,64 @@ +process GATK4_GETPILEUPSUMMARIES { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(input), path(index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + path variants + path variants_tbi + + output: + tuple val(meta), path('*.pileups.table'), emit: table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval_command = intervals ? "--intervals $intervals" : "--intervals $variants" + def reference_command = fasta ? "--reference $fasta" : '' + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK GetPileupSummaries] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + GetPileupSummaries \\ + --input $input \\ + --variant $variants \\ + --output ${prefix}.pileups.table \\ + $reference_command \\ + $interval_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.pileups.table + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/getpileupsummaries/meta.yml b/modules/nf-core/gatk4/getpileupsummaries/meta.yml new file mode 100644 index 0000000000..86b851e13a --- /dev/null +++ b/modules/nf-core/gatk4/getpileupsummaries/meta.yml @@ -0,0 +1,94 @@ +name: gatk4_getpileupsummaries +description: | + Summarizes counts of reads that support reference, alternate and other alleles for given sites. Results can be used with CalculateContamination. Requires a common germline variant sites file, such as from gnomAD. +keywords: + - gatk4 + - germlinevariantsites + - getpileupsumaries + - readcountssummary +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - input: + type: file + description: BAM/CRAM file to be summarised. + pattern: "*.{bam,cram}" + - index: + type: file + description: Index file for the input BAM/CRAM file. + pattern: "*.{bam.bai,cram.crai}" + - intervals: + type: file + description: File containing specified sites to be used for the summary. If + this option is not specified, variants file is used instead automatically. + pattern: "*.interval_list" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - - variants: + type: file + description: Population vcf of germline sequencing, containing allele fractions. + Is also used as sites file if no separate sites file is specified. + pattern: "*.vcf.gz" + - - variants_tbi: + type: file + description: Index file for the germline resource. + pattern: "*.vcf.gz.tbi" +output: + - table: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.pileups.table": + type: file + description: Table containing read counts for each site. + pattern: "*.pileups.table" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/haplotypecaller/environment.yml b/modules/nf-core/gatk4/haplotypecaller/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/haplotypecaller/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/haplotypecaller/main.nf b/modules/nf-core/gatk4/haplotypecaller/main.nf new file mode 100644 index 0000000000..1ef76789de --- /dev/null +++ b/modules/nf-core/gatk4/haplotypecaller/main.nf @@ -0,0 +1,77 @@ +process GATK4_HAPLOTYPECALLER { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(input), path(input_index), path(intervals), path(dragstr_model) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + tuple val(meta5), path(dbsnp) + tuple val(meta6), path(dbsnp_tbi) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.tbi") , optional:true, emit: tbi + tuple val(meta), path("*.realigned.bam"), optional:true, emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" + def interval_command = intervals ? "--intervals $intervals" : "" + def dragstr_command = dragstr_model ? "--dragstr-params-path $dragstr_model" : "" + def bamout_command = args.contains("--bam-writer-type") ? "--bam-output ${prefix.replaceAll('.g\\s*$', '')}.realigned.bam" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + HaplotypeCaller \\ + --input $input \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + --native-pair-hmm-threads ${task.cpus} \\ + $dbsnp_command \\ + $interval_command \\ + $dragstr_command \\ + $bamout_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def bamout_command = args.contains("--bam-writer-type") ? "--bam-output ${prefix.replaceAll('.g\\s*$', '')}.realigned.bam" : "" + + def stub_realigned_bam = bamout_command ? "touch ${prefix.replaceAll('.g\\s*$', '')}.realigned.bam" : "" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + ${stub_realigned_bam} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/haplotypecaller/meta.yml b/modules/nf-core/gatk4/haplotypecaller/meta.yml new file mode 100644 index 0000000000..9d4a05e914 --- /dev/null +++ b/modules/nf-core/gatk4/haplotypecaller/meta.yml @@ -0,0 +1,124 @@ +name: gatk4_haplotypecaller +description: Call germline SNPs and indels via local re-assembly of haplotypes +keywords: + - gatk4 + - haplotype + - haplotypecaller +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - dragstr_model: + type: file + description: Text file containing the DragSTR model of the used BAM/CRAM file + (optional) + pattern: "*.txt" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_reference' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_reference' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_reference' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - - meta5: + type: map + description: | + Groovy Map containing dbsnp information + e.g. [ id:'test_dbsnp' ] + - dbsnp: + type: file + description: VCF file containing known sites (optional) + - - meta6: + type: map + description: | + Groovy Map containing dbsnp information + e.g. [ id:'test_dbsnp' ] + - dbsnp_tbi: + type: file + description: VCF index of dbsnp (optional) +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz": + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Index of VCF file + pattern: "*.vcf.gz.tbi" + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.realigned.bam": + type: file + description: Assembled haplotypes and locally realigned reads + pattern: "*.realigned.bam" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@suzannejin" + - "@FriederikeHanssen" +maintainers: + - "@suzannejin" + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/intervallisttobed/environment.yml b/modules/nf-core/gatk4/intervallisttobed/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttobed/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/intervallisttobed/main.nf b/modules/nf-core/gatk4/intervallisttobed/main.nf new file mode 100644 index 0000000000..a7b05edbfc --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttobed/main.nf @@ -0,0 +1,56 @@ +process GATK4_INTERVALLISTTOBED { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd'}" + + input: + tuple val(meta), path(intervals) + + output: + tuple val(meta), path("${prefix}.bed"), emit: bed + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK IntervalListToBed] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + IntervalListToBed \\ + --INPUT ${intervals} \\ + --OUTPUT ${prefix}.bed \\ + --TMP_DIR . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/intervallisttobed/meta.yml b/modules/nf-core/gatk4/intervallisttobed/meta.yml new file mode 100644 index 0000000000..f151daafdb --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttobed/meta.yml @@ -0,0 +1,47 @@ +name: gatk4_intervallisttobed +description: Converts an Picard IntervalList file to a BED file. +keywords: + - bed + - conversion + - gatk4 + - interval +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["BSD-3-clause"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals: + type: file + description: IntervalList file +output: + - bed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bed: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "${prefix}.bed" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/learnreadorientationmodel/environment.yml b/modules/nf-core/gatk4/learnreadorientationmodel/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/learnreadorientationmodel/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/modules/gatk4/learnreadorientationmodel/main.nf b/modules/nf-core/gatk4/learnreadorientationmodel/main.nf similarity index 62% rename from modules/nf-core/modules/gatk4/learnreadorientationmodel/main.nf rename to modules/nf-core/gatk4/learnreadorientationmodel/main.nf index 7d96c27e0a..86e7daaa6c 100644 --- a/modules/nf-core/modules/gatk4/learnreadorientationmodel/main.nf +++ b/modules/nf-core/gatk4/learnreadorientationmodel/main.nf @@ -2,10 +2,10 @@ process GATK4_LEARNREADORIENTATIONMODEL { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: tuple val(meta), path(f1r2) @@ -20,19 +20,20 @@ process GATK4_LEARNREADORIENTATIONMODEL { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def inputs_list = [] - f1r2.each() { a -> inputs_list.add(" -I " + a) } - def avail_mem = 3 + def input_list = f1r2.collect{"--input $it"}.join(' ') + + def avail_mem = 3072 if (!task.memory) { log.info '[GATK LearnReadOrientationModel] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ LearnReadOrientationModel \\ - ${inputs_list.join(' ')} \\ - -O ${prefix}.tar.gz \\ + $input_list \\ + --output ${prefix}.tar.gz \\ + --tmp-dir . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/modules/gatk4/learnreadorientationmodel/meta.yml b/modules/nf-core/gatk4/learnreadorientationmodel/meta.yml similarity index 52% rename from modules/nf-core/modules/gatk4/learnreadorientationmodel/meta.yml rename to modules/nf-core/gatk4/learnreadorientationmodel/meta.yml index 4eff693923..fde7829c8d 100644 --- a/modules/nf-core/modules/gatk4/learnreadorientationmodel/meta.yml +++ b/modules/nf-core/gatk4/learnreadorientationmodel/meta.yml @@ -4,8 +4,8 @@ description: | keywords: - gatk4 - learnreadorientationmodel - - readorientationartifacts - mutect2 + - readorientationartifacts tools: - gatk4: description: | @@ -15,28 +15,34 @@ tools: homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 - licence: ['Apache-2.0'] - + licence: ["Apache-2.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - f1r2: - type: list - description: list of f1r2 files to be used as input. - pattern: "*.f1r2.tar.gz" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - f1r2: + type: list + description: list of f1r2 files to be used as input. + pattern: "*.f1r2.tar.gz" output: - artifactprior: - type: file - description: file containing artifact-priors to be used by filtermutectcalls - pattern: "*.tar.gz" + - meta: + type: file + description: file containing artifact-priors to be used by filtermutectcalls + pattern: "*.tar.gz" + - "*.tar.gz": + type: file + description: file containing artifact-priors to be used by filtermutectcalls + pattern: "*.tar.gz" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/markduplicates/environment.yml b/modules/nf-core/gatk4/markduplicates/environment.yml new file mode 100644 index 0000000000..ca0fc1923a --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/environment.yml @@ -0,0 +1,12 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/gatk4/markduplicates/main.nf b/modules/nf-core/gatk4/markduplicates/main.nf new file mode 100644 index 0000000000..f4bd896bf3 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/main.nf @@ -0,0 +1,86 @@ +process GATK4_MARKDUPLICATES { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/92/927ff9bb80d65b425cbe752db6648a84043feff6e8ca90e60f9ff6ddbe8938d5/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel_htslib_samtools:c1e4292d6ee27439'}" + + input: + tuple val(meta), path(bam) + path fasta + path fasta_fai + + output: + tuple val(meta), path("*cram"), emit: cram, optional: true + tuple val(meta), path("*bam"), emit: bam, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.metrics"), emit: metrics + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.bam" + + // If the extension is CRAM, then change it to BAM + prefix_bam = prefix.tokenize('.')[-1] == 'cram' ? "${prefix.substring(0, prefix.lastIndexOf('.'))}.bam" : prefix + + def input_list = bam.collect { "--INPUT ${it}" }.join(' ') + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + + // Using samtools and not Markduplicates to compress to CRAM speeds up computation: + // https://medium.com/@acarroll.dna/looking-at-trade-offs-in-compression-levels-for-genomics-tools-eec2834e8b94 + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + MarkDuplicates \\ + ${input_list} \\ + --OUTPUT ${prefix_bam} \\ + --METRICS_FILE ${prefix}.metrics \\ + --TMP_DIR . \\ + ${reference} \\ + ${args} + + # If cram files are wished as output, the run samtools for conversion + if [[ ${prefix} == *.cram ]]; then + samtools view -Ch -T ${fasta} -o ${prefix} ${prefix_bam} + rm ${prefix_bam} + samtools index ${prefix} + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.bam" + prefix_no_suffix = task.ext.prefix ? prefix.tokenize('.')[0] : "${meta.id}" + """ + touch ${prefix_no_suffix}.bam + touch ${prefix_no_suffix}.cram + touch ${prefix_no_suffix}.cram.crai + touch ${prefix_no_suffix}.bai + touch ${prefix}.metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/markduplicates/meta.yml b/modules/nf-core/gatk4/markduplicates/meta.yml new file mode 100644 index 0000000000..4772c5f39a --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/meta.yml @@ -0,0 +1,102 @@ +name: gatk4_markduplicates +description: This tool locates and tags duplicate reads in a BAM or SAM file, where + duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - bam + - gatk4 + - markduplicates + - sort +tools: + - gatk4: + description: Developed in the Data Sciences Platform at the Broad Institute, the + toolkit offers a wide variety of tools with a primary focus on variant discovery + and genotyping. Its powerful processing engine and high-performance computing + features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - - fasta: + type: file + description: Fasta file + pattern: "*.{fasta}" + - - fasta_fai: + type: file + description: Fasta index file + pattern: "*.{fai}" +output: + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*cram": + type: file + description: Marked duplicates CRAM file + pattern: "*.{cram}" + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*bam": + type: file + description: Marked duplicates BAM file + pattern: "*.{bam}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file + pattern: "*.{cram.crai}" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM index file + pattern: "*.{bam.bai}" + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.metrics": + type: file + description: Duplicate metrics file generated by GATK + pattern: "*.{metrics.txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/mergemutectstats/environment.yml b/modules/nf-core/gatk4/mergemutectstats/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/mergemutectstats/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/mergemutectstats/main.nf b/modules/nf-core/gatk4/mergemutectstats/main.nf new file mode 100644 index 0000000000..e6ddc6994c --- /dev/null +++ b/modules/nf-core/gatk4/mergemutectstats/main.nf @@ -0,0 +1,55 @@ +process GATK4_MERGEMUTECTSTATS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(stats) + + output: + tuple val(meta), path("*.vcf.gz.stats"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = stats.collect{ "--stats ${it}"}.join(' ') + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK MergeMutectStats] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + MergeMutectStats \\ + $input_list \\ + --output ${prefix}.vcf.gz.stats \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/mergemutectstats/meta.yml b/modules/nf-core/gatk4/mergemutectstats/meta.yml new file mode 100644 index 0000000000..09c8a54720 --- /dev/null +++ b/modules/nf-core/gatk4/mergemutectstats/meta.yml @@ -0,0 +1,46 @@ +name: gatk4_mergemutectstats +description: Merges mutect2 stats generated on different intervals/regions +keywords: + - gatk4 + - merge + - mutect2 + - mutectstats +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["BSD-3-clause"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: Stats file + pattern: "*.{stats}" +output: + - stats: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz.stats": + type: file + description: Stats file + pattern: "*.vcf.gz.stats" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/mergevcfs/environment.yml b/modules/nf-core/gatk4/mergevcfs/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/mergevcfs/main.nf b/modules/nf-core/gatk4/mergevcfs/main.nf new file mode 100644 index 0000000000..1752f48a60 --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/main.nf @@ -0,0 +1,60 @@ +process GATK4_MERGEVCFS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(vcf) + tuple val(meta2), path(dict) + + output: + tuple val(meta), path('*.vcf.gz'), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = vcf.collect{ "--INPUT $it"}.join(' ') + def reference_command = dict ? "--SEQUENCE_DICTIONARY $dict" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK MergeVcfs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + MergeVcfs \\ + $input_list \\ + --OUTPUT ${prefix}.vcf.gz \\ + $reference_command \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/mergevcfs/meta.yml b/modules/nf-core/gatk4/mergevcfs/meta.yml new file mode 100644 index 0000000000..b4f61d780d --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/meta.yml @@ -0,0 +1,64 @@ +name: gatk4_mergevcfs +description: Merges several vcf files +keywords: + - gatk4 + - merge + - vcf +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: list + description: Two or more VCF files + pattern: "*.{vcf,vcf.gz}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - dict: + type: file + description: Optional Sequence Dictionary as input + pattern: "*.dict" +output: + - vcf: + - meta: + type: file + description: merged vcf file + pattern: "*.vcf.gz" + - "*.vcf.gz": + type: file + description: merged vcf file + pattern: "*.vcf.gz" + - tbi: + - meta: + type: file + description: index files for the merged vcf files + pattern: "*.tbi" + - "*.tbi": + type: file + description: index files for the merged vcf files + pattern: "*.tbi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" +maintainers: + - "@kevinmenden" diff --git a/modules/nf-core/gatk4/mutect2/environment.yml b/modules/nf-core/gatk4/mutect2/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/mutect2/main.nf b/modules/nf-core/gatk4/mutect2/main.nf new file mode 100644 index 0000000000..756dfca942 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/main.nf @@ -0,0 +1,75 @@ +process GATK4_MUTECT2 { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(input), path(input_index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + path(germline_resource) + path(germline_resource_tbi) + path(panel_of_normals) + path(panel_of_normals_tbi) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + tuple val(meta), path("*.stats") , emit: stats + tuple val(meta), path("*.f1r2.tar.gz"), optional:true, emit: f1r2 + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def inputs = input.collect{ "--input $it"}.join(" ") + def interval_command = intervals ? "--intervals $intervals" : "" + def pon_command = panel_of_normals ? "--panel-of-normals $panel_of_normals" : "" + def gr_command = germline_resource ? "--germline-resource $germline_resource" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + Mutect2 \\ + $inputs \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + $pon_command \\ + $gr_command \\ + $interval_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + touch ${prefix}.vcf.gz.stats + echo "" | gzip > ${prefix}.f1r2.tar.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/mutect2/meta.yml b/modules/nf-core/gatk4/mutect2/meta.yml new file mode 100644 index 0000000000..27fd63a243 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/meta.yml @@ -0,0 +1,132 @@ +name: gatk4_mutect2 +description: Call somatic SNVs and indels via local assembly of haplotypes. +keywords: + - gatk4 + - haplotype + - indels + - mutect2 + - snvs + - somatic +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - input: + type: list + description: list of BAM files, also able to take CRAM as an input + pattern: "*.{bam/cram}" + - input_index: + type: list + description: list of BAM file indexes, also able to take CRAM indexes as an + input + pattern: "*.{bam.bai/cram.crai}" + - intervals: + type: file + description: Specify region the tools is run on. + pattern: ".{bed,interval_list}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - - germline_resource: + type: file + description: Population vcf of germline sequencing, containing allele fractions. + pattern: "*.vcf.gz" + - - germline_resource_tbi: + type: file + description: Index file for the germline resource. + pattern: "*.vcf.gz.tbi" + - - panel_of_normals: + type: file + description: vcf file to be used as a panel of normals. + pattern: "*.vcf.gz" + - - panel_of_normals_tbi: + type: file + description: Index for the panel of normals. + pattern: "*.vcf.gz.tbi" +output: + - vcf: + - meta: + type: file + description: compressed vcf file + pattern: "*.vcf.gz" + - "*.vcf.gz": + type: file + description: compressed vcf file + pattern: "*.vcf.gz" + - tbi: + - meta: + type: file + description: Index of vcf file + pattern: "*vcf.gz.tbi" + - "*.tbi": + type: file + description: Index of vcf file + pattern: "*vcf.gz.tbi" + - stats: + - meta: + type: file + description: Stats file that pairs with output vcf file + pattern: "*vcf.gz.stats" + - "*.stats": + type: file + description: Stats file that pairs with output vcf file + pattern: "*vcf.gz.stats" + - f1r2: + - meta: + type: file + description: file containing information to be passed to LearnReadOrientationModel + (only outputted when tumor_normal_pair mode is run) + pattern: "*.f1r2.tar.gz" + - "*.f1r2.tar.gz": + type: file + description: file containing information to be passed to LearnReadOrientationModel + (only outputted when tumor_normal_pair mode is run) + pattern: "*.f1r2.tar.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GCJMackenzie" + - "@ramprasadn" +maintainers: + - "@GCJMackenzie" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/variantrecalibrator/environment.yml b/modules/nf-core/gatk4/variantrecalibrator/environment.yml new file mode 100644 index 0000000000..b562b72c74 --- /dev/null +++ b/modules/nf-core/gatk4/variantrecalibrator/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/variantrecalibrator/main.nf b/modules/nf-core/gatk4/variantrecalibrator/main.nf new file mode 100644 index 0000000000..3c6048f4ba --- /dev/null +++ b/modules/nf-core/gatk4/variantrecalibrator/main.nf @@ -0,0 +1,71 @@ +process GATK4_VARIANTRECALIBRATOR { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + + input: + tuple val(meta), path(vcf), path(tbi) // input vcf and tbi of variants to recalibrate + path resource_vcf // resource vcf + path resource_tbi // resource tbi + val labels // string (or list of strings) containing dedicated resource labels already formatted with '--resource:' tag + path fasta + path fai + path dict + + output: + tuple val(meta), path("*.recal") , emit: recal + tuple val(meta), path("*.idx") , emit: idx + tuple val(meta), path("*.tranches"), emit: tranches + tuple val(meta), path("*plots.R") , emit: plots, optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference_command = fasta ? "--reference $fasta " : '' + def labels_command = labels.join(' ') + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK VariantRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + VariantRecalibrator \\ + --variant $vcf \\ + --output ${prefix}.recal \\ + --tranches-file ${prefix}.tranches \\ + $reference_command \\ + --tmp-dir . \\ + $labels_command \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.recal + touch ${prefix}.idx + touch ${prefix}.tranches + touch ${prefix}plots.R + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/variantrecalibrator/meta.yml b/modules/nf-core/gatk4/variantrecalibrator/meta.yml new file mode 100644 index 0000000000..72fcfd601c --- /dev/null +++ b/modules/nf-core/gatk4/variantrecalibrator/meta.yml @@ -0,0 +1,112 @@ +name: gatk4_variantrecalibrator +description: | + Build a recalibration model to score variant quality for filtering purposes. + It is highly recommended to follow GATK best practices when using this module, + the gaussian mixture model requires a large number of samples to be used for the + tool to produce optimal results. For example, 30 samples for exome data. For more details see + https://gatk.broadinstitute.org/hc/en-us/articles/4402736812443-Which-training-sets-arguments-should-I-use-for-running-VQSR- +keywords: + - gatk4 + - recalibration model + - variantrecalibrator +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - vcf: + type: file + description: input vcf file containing the variants to be recalibrated + pattern: "*.vcf.gz" + - tbi: + type: file + description: tbi file matching with -vcf + pattern: "*.vcf.gz.tbi" + - - resource_vcf: + type: file + description: all resource vcf files that are used with the corresponding '--resource' + label + pattern: "*.vcf.gz" + - - resource_tbi: + type: file + description: all resource tbi files that are used with the corresponding '--resource' + label + pattern: "*.vcf.gz.tbi" + - - labels: + type: string + description: necessary arguments for GATK VariantRecalibrator. Specified to + directly match the resources provided. More information can be found at + https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator + - - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + - - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - recal: + - meta: + type: file + description: Output recal file used by ApplyVQSR + pattern: "*.recal" + - "*.recal": + type: file + description: Output recal file used by ApplyVQSR + pattern: "*.recal" + - idx: + - meta: + type: file + description: Index file for the recal output file + pattern: "*.idx" + - "*.idx": + type: file + description: Index file for the recal output file + pattern: "*.idx" + - tranches: + - meta: + type: file + description: Output tranches file used by ApplyVQSR + pattern: "*.tranches" + - "*.tranches": + type: file + description: Output tranches file used by ApplyVQSR + pattern: "*.tranches" + - plots: + - meta: + type: file + description: Optional output rscript file to aid in visualization of the input + data and learned model. + pattern: "*plots.R" + - "*plots.R": + type: file + description: Optional output rscript file to aid in visualization of the input + data and learned model. + pattern: "*plots.R" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GCJMackenzie" + - "@nickhsmith" +maintainers: + - "@GCJMackenzie" + - "@nickhsmith" diff --git a/modules/nf-core/gatk4spark/applybqsr/environment.yml b/modules/nf-core/gatk4spark/applybqsr/environment.yml new file mode 100644 index 0000000000..a5c49e9557 --- /dev/null +++ b/modules/nf-core/gatk4spark/applybqsr/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gatk4-spark=4.6.1.0 diff --git a/modules/nf-core/gatk4spark/applybqsr/main.nf b/modules/nf-core/gatk4spark/applybqsr/main.nf new file mode 100644 index 0000000000..0c798f1ad7 --- /dev/null +++ b/modules/nf-core/gatk4spark/applybqsr/main.nf @@ -0,0 +1,71 @@ +process GATK4SPARK_APPLYBQSR { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/gatk4-spark:4.6.1.0--hdfd78af_0' + : 'biocontainers/gatk4-spark:4.6.1.0--hdfd78af_0'}" + + input: + tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) + path fasta + path fai + path dict + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}*bai"), emit: bai, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + // suffix can only be bam or cram, cram being the sensible default + def suffix = task.ext.suffix && task.ext.suffix == "bam" ? "bam" : "cram" + def interval_command = intervals ? "--intervals ${intervals}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK ApplyBQSRSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + ApplyBQSRSpark \\ + --input ${input} \\ + --output ${prefix}.${suffix} \\ + --reference ${fasta} \\ + --bqsr-recal-file ${bqsr_table} \\ + ${interval_command} \\ + --spark-master local[${task.cpus}] \\ + --tmp-dir . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "cram" + """ + touch ${prefix}.${suffix} + if [[ ${suffix} == bam ]]; then + touch ${prefix}.${suffix}.bai + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4spark/applybqsr/meta.yml b/modules/nf-core/gatk4spark/applybqsr/meta.yml new file mode 100644 index 0000000000..da7bf56808 --- /dev/null +++ b/modules/nf-core/gatk4spark/applybqsr/meta.yml @@ -0,0 +1,107 @@ +name: gatk4spark_applybqsr +description: Apply base quality score recalibration (BQSR) to a bam file +keywords: + - bam + - base quality score recalibration + - bqsr + - cram + - gatk4spark +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + ontologies: [] + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + ontologies: [] + - bqsr_table: + type: file + description: Recalibration table from gatk4_baserecalibrator + ontologies: [] + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + ontologies: [] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + ontologies: [] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + ontologies: [] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + ontologies: [] +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: Recalibrated BAM file + pattern: "${prefix}.bam" + ontologies: [] + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}*bai: + type: file + description: Recalibrated BAM index file + pattern: "${prefix}*bai" + ontologies: [] + cram: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: Recalibrated CRAM file + pattern: "${prefix}.cram" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4spark/baserecalibrator/environment.yml b/modules/nf-core/gatk4spark/baserecalibrator/environment.yml new file mode 100644 index 0000000000..a5c49e9557 --- /dev/null +++ b/modules/nf-core/gatk4spark/baserecalibrator/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gatk4-spark=4.6.1.0 diff --git a/modules/nf-core/gatk4spark/baserecalibrator/main.nf b/modules/nf-core/gatk4spark/baserecalibrator/main.nf new file mode 100644 index 0000000000..1f9e2cb595 --- /dev/null +++ b/modules/nf-core/gatk4spark/baserecalibrator/main.nf @@ -0,0 +1,67 @@ +process GATK4SPARK_BASERECALIBRATOR { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/gatk4-spark:4.6.1.0--hdfd78af_0' + : 'biocontainers/gatk4-spark:4.6.1.0--hdfd78af_0'}" + + input: + tuple val(meta), path(input), path(input_index), path(intervals) + path fasta + path fai + path dict + path known_sites + path known_sites_tbi + + output: + tuple val(meta), path("*.table"), emit: table + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval_command = intervals ? "--intervals ${intervals}" : "" + def sites_command = known_sites.collect { "--known-sites ${it}" }.join(' ') + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK BaseRecalibratorSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + BaseRecalibratorSpark \\ + --input ${input} \\ + --output ${prefix}.table \\ + --reference ${fasta} \\ + ${interval_command} \\ + ${sites_command} \\ + --spark-master local[${task.cpus}] \\ + --tmp-dir . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.table + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4spark/baserecalibrator/meta.yml b/modules/nf-core/gatk4spark/baserecalibrator/meta.yml new file mode 100644 index 0000000000..abb0e1a65e --- /dev/null +++ b/modules/nf-core/gatk4spark/baserecalibrator/meta.yml @@ -0,0 +1,80 @@ +name: gatk4spark_baserecalibrator +description: Generate recalibration table for Base Quality Score Recalibration (BQSR) +keywords: + - base quality score recalibration + - table + - bqsr + - gatk4spark + - sort +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - - known_sites: + type: file + description: VCF files with known sites for indels / snps (optional) + pattern: "*.vcf.gz" + - - known_sites_tbi: + type: file + description: Tabix index of the known_sites (optional) + pattern: "*.vcf.gz.tbi" +output: + - table: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.table": + type: file + description: Recalibration table from BaseRecalibrator + pattern: "*.{table}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4spark/markduplicates/environment.yml b/modules/nf-core/gatk4spark/markduplicates/environment.yml new file mode 100644 index 0000000000..a5c49e9557 --- /dev/null +++ b/modules/nf-core/gatk4spark/markduplicates/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gatk4-spark=4.6.1.0 diff --git a/modules/nf-core/gatk4spark/markduplicates/main.nf b/modules/nf-core/gatk4spark/markduplicates/main.nf new file mode 100644 index 0000000000..a0dcf3fee4 --- /dev/null +++ b/modules/nf-core/gatk4spark/markduplicates/main.nf @@ -0,0 +1,65 @@ +process GATK4SPARK_MARKDUPLICATES { + tag "${meta.id}" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/gatk4-spark:4.6.1.0--hdfd78af_0' + : 'biocontainers/gatk4-spark:4.6.1.0--hdfd78af_0'}" + + input: + tuple val(meta), path(bam) + path fasta + path fasta_fai + path dict + + output: + tuple val(meta), path("${prefix}"), emit: output + tuple val(meta), path("${prefix}.bai"), emit: bam_index, optional: true + tuple val(meta), path("*.metrics"), emit: metrics, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.bam" + def input_list = bam.collect { "--input ${it}" }.join(' ') + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + MarkDuplicatesSpark \\ + ${input_list} \\ + --output ${prefix} \\ + --reference ${fasta} \\ + --spark-master local[${task.cpus}] \\ + --tmp-dir . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.bam" + """ + touch ${prefix} + touch ${prefix}.bai + touch ${prefix}.metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4spark/markduplicates/meta.yml b/modules/nf-core/gatk4spark/markduplicates/meta.yml new file mode 100644 index 0000000000..fc8dee3dff --- /dev/null +++ b/modules/nf-core/gatk4spark/markduplicates/meta.yml @@ -0,0 +1,88 @@ +name: gatk4spark_markduplicates +description: This tool locates and tags duplicate reads in a BAM or SAM file, where + duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - bam + - gatk4spark + - markduplicates + - sort +tools: + - gatk4: + description: Developed in the Data Sciences Platform at the Broad Institute, the + toolkit offers a wide variety of tools with a primary focus on variant discovery + and genotyping. Its powerful processing engine and high-performance computing + features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - fasta_fai: + type: file + description: Index of reference fasta file + pattern: "*.fai" + - - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}: + type: file + description: Marked duplicates BAM/CRAM file + pattern: "*.{bam,cram}" + - bam_index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bai: + type: file + description: Optional BAM index file + pattern: "*.bai" + - metrics: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.metrics": + type: file + description: Metrics file + pattern: "*.metrics" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" + - "@SusiJo" +maintainers: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" + - "@SusiJo" diff --git a/modules/nf-core/gawk/environment.yml b/modules/nf-core/gawk/environment.yml new file mode 100644 index 0000000000..f52109e83b --- /dev/null +++ b/modules/nf-core/gawk/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::gawk=5.3.0 diff --git a/modules/nf-core/gawk/main.nf b/modules/nf-core/gawk/main.nf new file mode 100644 index 0000000000..615b2ce923 --- /dev/null +++ b/modules/nf-core/gawk/main.nf @@ -0,0 +1,70 @@ +process GAWK { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.3.0' : + 'biocontainers/gawk:5.3.0' }" + + input: + tuple val(meta), path(input, arity: '0..*') + path(program_file) + val(disable_redirect_output) + + output: + tuple val(meta), path("*.${suffix}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' // args is used for the main arguments of the tool + def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.collect{ it.getExtension()}.get(0)}" // use the first extension of the input files + + program = program_file ? "-f ${program_file}" : "${args2}" + lst_gz = input.findResults{ it.getExtension().endsWith("gz") ? it.toString() : null } + unzip = lst_gz ? "gunzip -q -f ${lst_gz.join(" ")}" : "" + input_cmd = input.collect { it.toString() - ~/\.gz$/ }.join(" ") + output_cmd = suffix.endsWith("gz") ? "| gzip > ${prefix}.${suffix}" : "> ${prefix}.${suffix}" + output = disable_redirect_output ? "" : output_cmd + cleanup = lst_gz ? "rm ${lst_gz.collect{ it - ~/\.gz$/ }.join(" ")}" : "" + + input.collect{ + assert it.name != "${prefix}.${suffix}" : "Input and output names are the same, set prefix in module configuration to disambiguate!" + } + + """ + ${unzip} + + awk \\ + ${args} \\ + ${program} \\ + ${input_cmd} \\ + ${output} + + ${cleanup} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.getExtension()}" + def create_cmd = suffix.endsWith("gz") ? "echo '' | gzip >" : "touch" + + """ + ${create_cmd} ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gawk/meta.yml b/modules/nf-core/gawk/meta.yml new file mode 100644 index 0000000000..34c50b125c --- /dev/null +++ b/modules/nf-core/gawk/meta.yml @@ -0,0 +1,63 @@ +name: "gawk" +description: | + If you are like many computer users, you would frequently like to make changes in various text files + wherever certain patterns appear, or extract data from parts of certain lines while discarding the rest. + The job is easy with awk, especially the GNU implementation gawk. +keywords: + - gawk + - awk + - txt + - text + - file parsing +tools: + - "gawk": + description: "GNU awk" + homepage: "https://www.gnu.org/software/gawk/" + documentation: "https://www.gnu.org/software/gawk/manual/" + tool_dev_url: "https://www.gnu.org/prep/ftp.html" + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: The input file - Specify the logic that needs to be executed on + this file on the `ext.args2` or in the program file. + If the files have a `.gz` extension, they will be unzipped using `zcat`. + pattern: "*" + - - program_file: + type: file + description: Optional file containing logic for awk to execute. If you don't + wish to use a file, you can use `ext.args2` to specify the logic. + pattern: "*" + - - disable_redirect_output: + type: boolean + description: Disable the redirection of awk output to a given file. This is + useful if you want to use awk's built-in redirect to write files instead + of the shell's redirect. +output: + - output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${suffix}": + type: file + description: The output file - if using shell redirection, specify the name of this + file using `ext.prefix` and the extension using `ext.suffix`. Otherwise, ensure + the awk program produces files with the extension in `ext.suffix`. + pattern: "*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/goleft/indexcov/environment.yml b/modules/nf-core/goleft/indexcov/environment.yml new file mode 100644 index 0000000000..7aa46cc4f0 --- /dev/null +++ b/modules/nf-core/goleft/indexcov/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::goleft=0.2.4 + - bioconda::htslib=1.12 diff --git a/modules/nf-core/goleft/indexcov/main.nf b/modules/nf-core/goleft/indexcov/main.nf new file mode 100644 index 0000000000..5d0ed5dfb0 --- /dev/null +++ b/modules/nf-core/goleft/indexcov/main.nf @@ -0,0 +1,65 @@ +process GOLEFT_INDEXCOV { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/goleft:0.2.4--h9ee0642_1': + 'biocontainers/goleft:0.2.4--h9ee0642_1' }" + + input: + tuple val(meta), path(bams), path(indexes) + tuple val(meta2), path(fai) + + output: + tuple val(meta), path("${prefix}/*") , emit: output + tuple val(meta), path("${prefix}/*ped") , emit: ped , optional: true + tuple val(meta), path("${prefix}/*bed.gz") , emit: bed , optional: true + tuple val(meta), path("${prefix}/*bed.gz.tbi"), emit: bed_index , optional: true + tuple val(meta), path("${prefix}/*roc") , emit: roc , optional: true + tuple val(meta), path("${prefix}/*html") , emit: html, optional: true + tuple val(meta), path("${prefix}/*png") , emit: png , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + // indexcov uses BAM files or CRAI + def input_files = bams.findAll{it.name.endsWith(".bam")} + indexes.findAll{it.name.endsWith(".crai")} + def extranormalize = input_files.any{it.name.endsWith(".crai")} ? " --extranormalize " : "" + """ + goleft indexcov \\ + --fai ${fai} \\ + --directory ${prefix} \\ + ${extranormalize} \\ + $args \\ + ${input_files.join(" ")} + + if [ -f "${prefix}/${prefix}-indexcov.bed.gz" ] ; then + tabix -p bed "${prefix}/${prefix}-indexcov.bed.gz" + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + goleft: \$(goleft --version 2>&1 | head -n 1 | sed 's/^.*goleft Version: //') + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir "${prefix}" + echo "" | gzip > "${prefix}/${prefix}-indexcov.bed.gz" + touch "${prefix}/${prefix}-indexcov.bed.gz.tbi" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + goleft: \$(goleft --version 2>&1 | head -n 1 | sed 's/^.*goleft Version: //') + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/goleft/indexcov/meta.yml b/modules/nf-core/goleft/indexcov/meta.yml new file mode 100644 index 0000000000..1619caf32d --- /dev/null +++ b/modules/nf-core/goleft/indexcov/meta.yml @@ -0,0 +1,122 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "goleft_indexcov" +description: Quickly estimate coverage from a whole-genome bam or cram index. A bam + index has 16KB resolution so that's what this gives, but it provides what appears + to be a high-quality coverage estimate in seconds per genome. +keywords: + - coverage + - cnv + - genomics + - depth +tools: + - "goleft": + description: "goleft is a collection of bioinformatics tools distributed under + MIT license in a single static binary" + homepage: "https://github.com/brentp/goleft" + documentation: "https://github.com/brentp/goleft" + tool_dev_url: "https://github.com/brentp/goleft" + doi: "10.1093/gigascience/gix090" + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false] + - bams: + type: file + description: Sorted BAM/CRAM/SAM files + pattern: "*.{bam,cram,sam}" + - indexes: + type: file + description: BAI/CRAI files + pattern: "*.{bai,crai}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false] + - fai: + type: file + description: FASTA index + pattern: "*.{fai}" +output: + - output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*: + type: file + description: Files generated by indexcov + - ped: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*ped: + type: file + description: ped files + pattern: "*ped" + - bed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*bed.gz: + type: file + description: bed files + pattern: "*bed.gz" + - bed_index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*bed.gz.tbi: + type: file + description: bed index files + pattern: "*bed.gz.tbi" + - roc: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*roc: + type: file + description: roc files + pattern: "*roc" + - html: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*html: + type: file + description: html files + pattern: "*html" + - png: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/*png: + type: file + description: png files + pattern: "*png" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@lindenb" +maintainers: + - "@lindenb" diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 0000000000..9b926b1ffa --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,12 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::coreutils=9.5 + - conda-forge::grep=3.11 + - conda-forge::gzip=1.13 + - conda-forge::lbzip2=2.5 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf new file mode 100644 index 0000000000..3ffc8e9264 --- /dev/null +++ b/modules/nf-core/gunzip/main.nf @@ -0,0 +1,55 @@ +process GUNZIP { + tag "${archive}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/52ccce28d2ab928ab862e25aae26314d69c8e38bd41ca9431c67ef05221348aa/data' + : 'community.wave.seqera.io/library/coreutils_grep_gzip_lbzip2_pruned:838ba80435a629f8'}" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("${gunzip}"), emit: gunzip + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def extension = (archive.toString() - '.gz').tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".${extension}" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".${extension}" + """ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ + ${args} \\ + ${archive} \\ + > ${gunzip} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = (archive.toString() - '.gz').tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".${extension}" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".${extension}" + """ + touch ${gunzip} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 0000000000..69d3102455 --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,47 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + - meta: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - ${gunzip}: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" + - "@gallvp" diff --git a/modules/nf-core/lofreq/callparallel/environment.yml b/modules/nf-core/lofreq/callparallel/environment.yml new file mode 100644 index 0000000000..4ade529a78 --- /dev/null +++ b/modules/nf-core/lofreq/callparallel/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::lofreq=2.1.5 diff --git a/modules/nf-core/lofreq/callparallel/main.nf b/modules/nf-core/lofreq/callparallel/main.nf new file mode 100644 index 0000000000..93f9a3dfb1 --- /dev/null +++ b/modules/nf-core/lofreq/callparallel/main.nf @@ -0,0 +1,70 @@ +process LOFREQ_CALLPARALLEL { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/lofreq:2.1.5--py38h588ecb2_4' : + 'biocontainers/lofreq:2.1.5--py38h588ecb2_4' }" + + input: + tuple val(meta) , path(bam), path(bai), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.vcf.gz.tbi"), emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def options_intervals = intervals ? "-l ${intervals}" : "" + + def alignment_cram = bam.Extension == "cram" ? true : false + def alignment_bam = bam.Extension == "bam" ? true : false + def alignment_out = alignment_cram ? bam.BaseName + ".bam" : "${bam}" + + def samtools_cram_convert = '' + samtools_cram_convert += alignment_cram ? " samtools view -T ${fasta} ${bam} -@ $task.cpus -o ${alignment_out}\n" : '' + samtools_cram_convert += alignment_cram ? " samtools index ${alignment_out}\n" : '' + + def samtools_cram_remove = '' + samtools_cram_remove += alignment_cram ? " rm ${alignment_out}\n" : '' + samtools_cram_remove += alignment_cram ? " rm ${alignment_out}.bai\n " : '' + """ + $samtools_cram_convert + + lofreq \\ + call-parallel \\ + --pp-threads $task.cpus \\ + $args \\ + $options_intervals \\ + -f $fasta \\ + -o ${prefix}.vcf.gz \\ + $alignment_out + + $samtools_cram_remove + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + lofreq: \$(echo \$(lofreq version 2>&1) | sed 's/^version: //; s/ *commit.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + echo "" | gzip > ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + lofreq: \$(echo \$(lofreq version 2>&1) | sed 's/^version: //; s/ *commit.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/lofreq/callparallel/meta.yml b/modules/nf-core/lofreq/callparallel/meta.yml new file mode 100644 index 0000000000..25a33e85c4 --- /dev/null +++ b/modules/nf-core/lofreq/callparallel/meta.yml @@ -0,0 +1,87 @@ +name: lofreq_callparallel +description: It predicts variants using multiple processors +keywords: + - variant calling + - low frequency variant calling + - call + - variants +tools: + - lofreq: + description: Lofreq is a fast and sensitive variant-caller for inferring SNVs + and indels from next-generation sequencing data. It's call-parallel programme + predicts variants using multiple processors + homepage: https://csb5.github.io/lofreq/ + documentation: https://csb5.github.io/lofreq/ + doi: "10.1093/nar/gks918" + licence: ["MIT"] + identifier: biotools:lofreq +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: Tumor sample sorted BAM file + pattern: "*.{bam}" + - bai: + type: file + description: BAM index file + pattern: "*.{bam.bai}" + - intervals: + type: file + description: BED file containing target regions for variant calling + pattern: "*.{bed}" + - - meta2: + type: map + description: | + Groovy Map containing sample information about the reference fasta + e.g. [ id:'reference' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fasta}" + - - meta3: + type: map + description: | + Groovy Map containing sample information about the reference fasta fai + e.g. [ id:'reference' ] + - fai: + type: file + description: Reference genome FASTA index file + pattern: "*.{fai}" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz": + type: file + description: Predicted variants file + pattern: "*.{vcf}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz.tbi": + type: file + description: Index of vcf file + pattern: "*.{vcf.gz.tbi}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kaurravneet4123" + - "@bjohnnyd" +maintainers: + - "@kaurravneet4123" + - "@bjohnnyd" + - "@nevinwu" + - "@AitorPeseta" diff --git a/modules/nf-core/manta/germline/environment.yml b/modules/nf-core/manta/germline/environment.yml new file mode 100644 index 0000000000..3804c07f42 --- /dev/null +++ b/modules/nf-core/manta/germline/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::python=2.7.15 + - bioconda::manta=1.6.0 diff --git a/modules/nf-core/manta/germline/main.nf b/modules/nf-core/manta/germline/main.nf new file mode 100644 index 0000000000..0fad64f31e --- /dev/null +++ b/modules/nf-core/manta/germline/main.nf @@ -0,0 +1,81 @@ +process MANTA_GERMLINE { + tag "$meta.id" + label 'process_medium' + label 'error_retry' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f6/f696c93e6209e33ac0d15f1ecfa799bc67329eec07b0569e065ea8b220b53953/data' : + 'community.wave.seqera.io/library/manta_python:0eb71149179b3920' }" + + input: + //Matching the target bed with the input sample allows to parallelize the same sample run across different intervals or a single bed file + tuple val(meta), path(input), path(index), path(target_bed), path(target_bed_tbi) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + path(config) + + output: + tuple val(meta), path("*candidate_small_indels.vcf.gz") , emit: candidate_small_indels_vcf + tuple val(meta), path("*candidate_small_indels.vcf.gz.tbi"), emit: candidate_small_indels_vcf_tbi + tuple val(meta), path("*candidate_sv.vcf.gz") , emit: candidate_sv_vcf + tuple val(meta), path("*candidate_sv.vcf.gz.tbi") , emit: candidate_sv_vcf_tbi + tuple val(meta), path("*diploid_sv.vcf.gz") , emit: diploid_sv_vcf + tuple val(meta), path("*diploid_sv.vcf.gz.tbi") , emit: diploid_sv_vcf_tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_files = input.collect{"--bam ${it}"}.join(' ') + def options_manta = target_bed ? "--callRegions $target_bed" : "" + def config_option = config ? "--config ${config}" : "" + """ + configManta.py \\ + ${input_files} \\ + ${config_option} \\ + --reference $fasta \\ + --runDir manta \\ + $options_manta \\ + $args + + python manta/runWorkflow.py -m local -j $task.cpus + + mv manta/results/variants/candidateSmallIndels.vcf.gz \\ + ${prefix}.candidate_small_indels.vcf.gz + mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \\ + ${prefix}.candidate_small_indels.vcf.gz.tbi + mv manta/results/variants/candidateSV.vcf.gz \\ + ${prefix}.candidate_sv.vcf.gz + mv manta/results/variants/candidateSV.vcf.gz.tbi \\ + ${prefix}.candidate_sv.vcf.gz.tbi + mv manta/results/variants/diploidSV.vcf.gz \\ + ${prefix}.diploid_sv.vcf.gz + mv manta/results/variants/diploidSV.vcf.gz.tbi \\ + ${prefix}.diploid_sv.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + manta: \$( configManta.py --version ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.candidate_small_indels.vcf.gz + touch ${prefix}.candidate_small_indels.vcf.gz.tbi + echo "" | gzip > ${prefix}.candidate_sv.vcf.gz + touch ${prefix}.candidate_sv.vcf.gz.tbi + echo "" | gzip > ${prefix}.diploid_sv.vcf.gz + touch ${prefix}.diploid_sv.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + manta: \$( configManta.py --version ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/manta/germline/meta.yml b/modules/nf-core/manta/germline/meta.yml new file mode 100644 index 0000000000..9774e1a059 --- /dev/null +++ b/modules/nf-core/manta/germline/meta.yml @@ -0,0 +1,154 @@ +name: manta_germline +description: Manta calls structural variants (SVs) and indels from mapped paired-end + sequencing reads. It is optimized for analysis of germline variation in small sets + of individuals and somatic variation in tumor/normal sample pairs. +keywords: + - somatic + - wgs + - wxs + - panel + - vcf + - structural variants + - small indels +tools: + - manta: + description: Structural variant and indel caller for mapped sequencing data + homepage: https://github.com/Illumina/manta + documentation: https://github.com/Illumina/manta/blob/v1.6.0/docs/userGuide/README.md + tool_dev_url: https://github.com/Illumina/manta + doi: "10.1093/bioinformatics/btv710" + licence: ["GPL v3"] + identifier: biotools:manta_sv +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file. For joint calling use a list of files. + pattern: "*.{bam,cram,sam}" + ontologies: [] + - index: + type: file + description: BAM/CRAM/SAM index file. For joint calling use a list of files. + pattern: "*.{bai,crai,sai}" + ontologies: [] + - target_bed: + type: file + description: BED file containing target regions for variant calling + pattern: "*.{bed}" + ontologies: [] + - target_bed_tbi: + type: file + description: Index for BED file containing target regions for variant calling + pattern: "*.{bed.tbi}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Genome reference FASTA file + pattern: "*.{fa,fasta}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Genome reference FASTA index file + pattern: "*.{fa.fai,fasta.fai}" + ontologies: [] + - config: + type: file + description: Manta configuration file + pattern: "*.{ini,conf,config}" + ontologies: [] +output: + candidate_small_indels_vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*candidate_small_indels.vcf.gz": + type: file + description: Gzipped VCF file containing variants + pattern: "*.{vcf.gz}" + ontologies: [] + candidate_small_indels_vcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*candidate_small_indels.vcf.gz.tbi": + type: file + description: Index for gzipped VCF file containing variants + pattern: "*.{vcf.gz.tbi}" + ontologies: [] + candidate_sv_vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*candidate_sv.vcf.gz": + type: file + description: Gzipped VCF file containing variants + pattern: "*.{vcf.gz}" + ontologies: [] + candidate_sv_vcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*candidate_sv.vcf.gz.tbi": + type: file + description: Index for gzipped VCF file containing variants + pattern: "*.{vcf.gz.tbi}" + ontologies: [] + diploid_sv_vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*diploid_sv.vcf.gz": + type: file + description: Gzipped VCF file containing variants + pattern: "*.{vcf.gz}" + ontologies: [] + diploid_sv_vcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*diploid_sv.vcf.gz.tbi": + type: file + description: Index for gzipped VCF file containing variants + pattern: "*.{vcf.gz.tbi}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@maxulysse" + - "@ramprasadn" + - "@nvnieuwk" +maintainers: + - "@maxulysse" + - "@ramprasadn" + - "@nvnieuwk" diff --git a/modules/nf-core/manta/somatic/environment.yml b/modules/nf-core/manta/somatic/environment.yml new file mode 100644 index 0000000000..3804c07f42 --- /dev/null +++ b/modules/nf-core/manta/somatic/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::python=2.7.15 + - bioconda::manta=1.6.0 diff --git a/modules/nf-core/manta/somatic/main.nf b/modules/nf-core/manta/somatic/main.nf new file mode 100644 index 0000000000..317e3f484e --- /dev/null +++ b/modules/nf-core/manta/somatic/main.nf @@ -0,0 +1,88 @@ +process MANTA_SOMATIC { + tag "$meta.id" + label 'process_medium' + label 'error_retry' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f6/f696c93e6209e33ac0d15f1ecfa799bc67329eec07b0569e065ea8b220b53953/data' : + 'community.wave.seqera.io/library/manta_python:0eb71149179b3920' }" + + input: + tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path(target_bed), path(target_bed_tbi) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + path(config) + + output: + tuple val(meta), path("*.candidate_small_indels.vcf.gz") , emit: candidate_small_indels_vcf + tuple val(meta), path("*.candidate_small_indels.vcf.gz.tbi") , emit: candidate_small_indels_vcf_tbi + tuple val(meta), path("*.candidate_sv.vcf.gz") , emit: candidate_sv_vcf + tuple val(meta), path("*.candidate_sv.vcf.gz.tbi") , emit: candidate_sv_vcf_tbi + tuple val(meta), path("*.diploid_sv.vcf.gz") , emit: diploid_sv_vcf + tuple val(meta), path("*.diploid_sv.vcf.gz.tbi") , emit: diploid_sv_vcf_tbi + tuple val(meta), path("*.somatic_sv.vcf.gz") , emit: somatic_sv_vcf + tuple val(meta), path("*.somatic_sv.vcf.gz.tbi") , emit: somatic_sv_vcf_tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def options_manta = target_bed ? "--callRegions $target_bed" : "" + def config_option = config ? "--config ${config}" : "" + """ + configManta.py \\ + --tumorBam $input_tumor \\ + --normalBam $input_normal \\ + --reference $fasta \\ + ${config_option} \\ + --runDir manta \\ + $options_manta \\ + $args + + python manta/runWorkflow.py -m local -j $task.cpus + + mv manta/results/variants/candidateSmallIndels.vcf.gz \\ + ${prefix}.candidate_small_indels.vcf.gz + mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \\ + ${prefix}.candidate_small_indels.vcf.gz.tbi + mv manta/results/variants/candidateSV.vcf.gz \\ + ${prefix}.candidate_sv.vcf.gz + mv manta/results/variants/candidateSV.vcf.gz.tbi \\ + ${prefix}.candidate_sv.vcf.gz.tbi + mv manta/results/variants/diploidSV.vcf.gz \\ + ${prefix}.diploid_sv.vcf.gz + mv manta/results/variants/diploidSV.vcf.gz.tbi \\ + ${prefix}.diploid_sv.vcf.gz.tbi + mv manta/results/variants/somaticSV.vcf.gz \\ + ${prefix}.somatic_sv.vcf.gz + mv manta/results/variants/somaticSV.vcf.gz.tbi \\ + ${prefix}.somatic_sv.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + manta: \$( configManta.py --version ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.candidate_small_indels.vcf.gz + touch ${prefix}.candidate_small_indels.vcf.gz.tbi + echo "" | gzip > ${prefix}.candidate_sv.vcf.gz + touch ${prefix}.candidate_sv.vcf.gz.tbi + echo "" | gzip > ${prefix}.diploid_sv.vcf.gz + touch ${prefix}.diploid_sv.vcf.gz.tbi + echo "" | gzip > ${prefix}.somatic_sv.vcf.gz + touch ${prefix}.somatic_sv.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + manta: \$( configManta.py --version ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/manta/somatic/meta.yml b/modules/nf-core/manta/somatic/meta.yml new file mode 100644 index 0000000000..3d619dd586 --- /dev/null +++ b/modules/nf-core/manta/somatic/meta.yml @@ -0,0 +1,184 @@ +name: manta_somatic +description: Manta calls structural variants (SVs) and indels from mapped paired-end + sequencing reads. It is optimized for analysis of germline variation in small sets + of individuals and somatic variation in tumor/normal sample pairs. +keywords: + - somatic + - wgs + - wxs + - panel + - vcf + - structural variants + - small indels +tools: + - manta: + description: Structural variant and indel caller for mapped sequencing data + homepage: https://github.com/Illumina/manta + documentation: https://github.com/Illumina/manta/blob/v1.6.0/docs/userGuide/README.md + tool_dev_url: https://github.com/Illumina/manta + doi: "10.1093/bioinformatics/btv710" + licence: ["GPL v3"] + identifier: biotools:manta_sv +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_normal: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - input_index_normal: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + - input_tumor: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - input_index_tumor: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + - target_bed: + type: file + description: BED file containing target regions for variant calling + pattern: "*.{bed}" + ontologies: [] + - target_bed_tbi: + type: file + description: Index for BED file containing target regions for variant calling + pattern: "*.{bed.tbi}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Genome reference FASTA file + pattern: "*.{fa,fasta}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Genome reference FASTA index file + pattern: "*.{fa.fai,fasta.fai}" + ontologies: [] + - config: + type: file + description: Manta configuration file + pattern: "*.{ini,conf,config}" + ontologies: [] +output: + candidate_small_indels_vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.candidate_small_indels.vcf.gz": + type: file + description: Gzipped VCF file containing variants + pattern: "*.{vcf.gz}" + ontologies: [] + candidate_small_indels_vcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.candidate_small_indels.vcf.gz.tbi": + type: file + description: Index for gzipped VCF file containing variants + pattern: "*.{vcf.gz.tbi}" + ontologies: [] + candidate_sv_vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.candidate_sv.vcf.gz": + type: file + description: Gzipped VCF file containing variants + pattern: "*.{vcf.gz}" + ontologies: [] + candidate_sv_vcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.candidate_sv.vcf.gz.tbi": + type: file + description: Index for gzipped VCF file containing variants + pattern: "*.{vcf.gz.tbi}" + ontologies: [] + diploid_sv_vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.diploid_sv.vcf.gz": + type: file + description: Gzipped VCF file containing variants + pattern: "*.{vcf.gz}" + ontologies: [] + diploid_sv_vcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.diploid_sv.vcf.gz.tbi": + type: file + description: Index for gzipped VCF file containing variants + pattern: "*.{vcf.gz.tbi}" + ontologies: [] + somatic_sv_vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.somatic_sv.vcf.gz": + type: file + description: Gzipped VCF file containing variants + pattern: "*.{vcf.gz}" + ontologies: [] + somatic_sv_vcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.somatic_sv.vcf.gz.tbi": + type: file + description: Index for gzipped VCF file containing variants + pattern: "*.{vcf.gz.tbi}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@FriederikeHanssen" + - "@nvnieuwk" +maintainers: + - "@FriederikeHanssen" + - "@nvnieuwk" diff --git a/modules/nf-core/manta/tumoronly/environment.yml b/modules/nf-core/manta/tumoronly/environment.yml new file mode 100644 index 0000000000..3804c07f42 --- /dev/null +++ b/modules/nf-core/manta/tumoronly/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::python=2.7.15 + - bioconda::manta=1.6.0 diff --git a/modules/nf-core/modules/manta/tumoronly/main.nf b/modules/nf-core/manta/tumoronly/main.nf similarity index 50% rename from modules/nf-core/modules/manta/tumoronly/main.nf rename to modules/nf-core/manta/tumoronly/main.nf index b71229eccb..b657468b58 100644 --- a/modules/nf-core/modules/manta/tumoronly/main.nf +++ b/modules/nf-core/manta/tumoronly/main.nf @@ -1,16 +1,18 @@ process MANTA_TUMORONLY { tag "$meta.id" - label 'process_high' + label 'process_medium' + label 'error_retry' - conda (params.enable_conda ? "bioconda::manta=1.6.0" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/manta:1.6.0--h9ee0642_1' : - 'quay.io/biocontainers/manta:1.6.0--h9ee0642_1' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f6/f696c93e6209e33ac0d15f1ecfa799bc67329eec07b0569e065ea8b220b53953/data' : + 'community.wave.seqera.io/library/manta_python:0eb71149179b3920' }" input: tuple val(meta), path(input), path(input_index), path(target_bed), path(target_bed_tbi) - path fasta - path fai + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + path(config) output: tuple val(meta), path("*candidate_small_indels.vcf.gz") , emit: candidate_small_indels_vcf @@ -21,30 +23,36 @@ process MANTA_TUMORONLY { tuple val(meta), path("*tumor_sv.vcf.gz.tbi") , emit: tumor_sv_vcf_tbi path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def options_manta = target_bed ? "--callRegions $target_bed" : "" + def config_option = config ? "--config ${config}" : "" """ - configManta.py \ - --tumorBam $input \ - --reference $fasta \ - $options_manta \ - --runDir manta + configManta.py \\ + --tumorBam $input \\ + --reference $fasta \\ + ${config_option} \\ + --runDir manta \\ + $options_manta \\ + $args python manta/runWorkflow.py -m local -j $task.cpus - mv manta/results/variants/candidateSmallIndels.vcf.gz \ + mv manta/results/variants/candidateSmallIndels.vcf.gz \\ ${prefix}.candidate_small_indels.vcf.gz - mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \ + mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \\ ${prefix}.candidate_small_indels.vcf.gz.tbi - mv manta/results/variants/candidateSV.vcf.gz \ + mv manta/results/variants/candidateSV.vcf.gz \\ ${prefix}.candidate_sv.vcf.gz - mv manta/results/variants/candidateSV.vcf.gz.tbi \ + mv manta/results/variants/candidateSV.vcf.gz.tbi \\ ${prefix}.candidate_sv.vcf.gz.tbi - mv manta/results/variants/tumorSV.vcf.gz \ + mv manta/results/variants/tumorSV.vcf.gz \\ ${prefix}.tumor_sv.vcf.gz - mv manta/results/variants/tumorSV.vcf.gz.tbi \ + mv manta/results/variants/tumorSV.vcf.gz.tbi \\ ${prefix}.tumor_sv.vcf.gz.tbi cat <<-END_VERSIONS > versions.yml @@ -52,4 +60,20 @@ process MANTA_TUMORONLY { manta: \$( configManta.py --version ) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.candidate_small_indels.vcf.gz + touch ${prefix}.candidate_small_indels.vcf.gz.tbi + echo "" | gzip > ${prefix}.candidate_sv.vcf.gz + touch ${prefix}.candidate_sv.vcf.gz.tbi + echo "" | gzip > ${prefix}.tumor_sv.vcf.gz + touch ${prefix}.tumor_sv.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + manta: \$( configManta.py --version ) + END_VERSIONS + """ } diff --git a/modules/nf-core/manta/tumoronly/meta.yml b/modules/nf-core/manta/tumoronly/meta.yml new file mode 100644 index 0000000000..0773760b5a --- /dev/null +++ b/modules/nf-core/manta/tumoronly/meta.yml @@ -0,0 +1,152 @@ +name: manta_tumoronly +description: Manta calls structural variants (SVs) and indels from mapped paired-end + sequencing reads. It is optimized for analysis of germline variation in small sets + of individuals and somatic variation in tumor/normal sample pairs. +keywords: + - somatic + - wgs + - wxs + - panel + - vcf + - structural variants + - small indels +tools: + - manta: + description: Structural variant and indel caller for mapped sequencing data + homepage: https://github.com/Illumina/manta + documentation: https://github.com/Illumina/manta/blob/v1.6.0/docs/userGuide/README.md + tool_dev_url: https://github.com/Illumina/manta + doi: "10.1093/bioinformatics/btv710" + licence: ["GPL v3"] + identifier: biotools:manta_sv +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - input_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + - target_bed: + type: file + description: BED file containing target regions for variant calling + pattern: "*.{bed}" + ontologies: [] + - target_bed_tbi: + type: file + description: Index for BED file containing target regions for variant calling + pattern: "*.{bed.tbi}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Genome reference FASTA file + pattern: "*.{fa,fasta}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Genome reference FASTA index file + pattern: "*.{fa.fai,fasta.fai}" + ontologies: [] + - config: + type: file + description: Manta configuration file + pattern: "*.{ini,conf,config}" + ontologies: [] +output: + candidate_small_indels_vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*candidate_small_indels.vcf.gz": + type: file + description: Gzipped VCF file containing variants + pattern: "*.{vcf.gz}" + ontologies: [] + candidate_small_indels_vcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*candidate_small_indels.vcf.gz.tbi": + type: file + description: Index for gzipped VCF file containing variants + pattern: "*.{vcf.gz.tbi}" + ontologies: [] + candidate_sv_vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*candidate_sv.vcf.gz": + type: file + description: Gzipped VCF file containing variants + pattern: "*.{vcf.gz}" + ontologies: [] + candidate_sv_vcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*candidate_sv.vcf.gz.tbi": + type: file + description: Index for gzipped VCF file containing variants + pattern: "*.{vcf.gz.tbi}" + ontologies: [] + tumor_sv_vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*tumor_sv.vcf.gz": + type: file + description: Gzipped VCF file containing variants + pattern: "*.{vcf.gz}" + ontologies: [] + tumor_sv_vcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*tumor_sv.vcf.gz.tbi": + type: file + description: Index for gzipped VCF file containing variants + pattern: "*.{vcf.gz.tbi}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@maxulysse" + - "@nvnieuwk" +maintainers: + - "@maxulysse" + - "@nvnieuwk" diff --git a/modules/nf-core/modules/bcftools/stats/main.nf b/modules/nf-core/modules/bcftools/stats/main.nf deleted file mode 100644 index 7e150d1fe1..0000000000 --- a/modules/nf-core/modules/bcftools/stats/main.nf +++ /dev/null @@ -1,30 +0,0 @@ -process BCFTOOLS_STATS { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::bcftools=1.14' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0' : - 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" - - input: - tuple val(meta), path(vcf) - - output: - tuple val(meta), path("*stats.txt"), emit: stats - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - bcftools stats $args $vcf > ${prefix}.bcftools_stats.txt - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/bcftools/stats/meta.yml b/modules/nf-core/modules/bcftools/stats/meta.yml deleted file mode 100644 index 505bf72932..0000000000 --- a/modules/nf-core/modules/bcftools/stats/meta.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: bcftools_stats -description: Generates stats from VCF files -keywords: - - variant calling - - stats - - VCF -tools: - - stats: - description: | - Parses VCF or BCF and produces text file stats which is suitable for - machine processing and can be plotted using plot-vcfstats. - homepage: http://samtools.github.io/bcftools/bcftools.html - documentation: http://www.htslib.org/doc/bcftools.html - doi: 10.1093/bioinformatics/btp352 - licence: ['MIT'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - vcf: - type: file - description: VCF input file - pattern: "*.{vcf}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - stats: - type: file - description: Text output file containing stats - pattern: "*_{stats.txt}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" diff --git a/modules/nf-core/modules/bwa/index/main.nf b/modules/nf-core/modules/bwa/index/main.nf deleted file mode 100644 index 3affbf16bc..0000000000 --- a/modules/nf-core/modules/bwa/index/main.nf +++ /dev/null @@ -1,35 +0,0 @@ -process BWA_INDEX { - tag "$fasta" - label 'process_high' - - conda (params.enable_conda ? "bioconda::bwa=0.7.17" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : - 'quay.io/biocontainers/bwa:0.7.17--hed695b0_7' }" - - input: - path fasta - - output: - path "bwa" , emit: index - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - mkdir bwa - bwa \\ - index \\ - $args \\ - -p bwa/${fasta.baseName} \\ - $fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/bwa/index/meta.yml b/modules/nf-core/modules/bwa/index/meta.yml deleted file mode 100644 index 11d62df374..0000000000 --- a/modules/nf-core/modules/bwa/index/meta.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: bwa_index -description: Create BWA index for reference genome -keywords: - - index - - fasta - - genome - - reference -tools: - - bwa: - description: | - BWA is a software package for mapping DNA sequences against - a large reference genome, such as the human genome. - homepage: http://bio-bwa.sourceforge.net/ - documentation: http://www.htslib.org/doc/samtools.html - arxiv: arXiv:1303.3997 - licence: ['GPL-3.0-or-later'] -input: - - fasta: - type: file - description: Input genome fasta file -output: - - index: - type: file - description: BWA genome index files - pattern: "*.{amb,ann,bwt,pac,sa}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@maxulysse" diff --git a/modules/nf-core/modules/bwa/mem/main.nf b/modules/nf-core/modules/bwa/mem/main.nf deleted file mode 100644 index 9a91c77f2d..0000000000 --- a/modules/nf-core/modules/bwa/mem/main.nf +++ /dev/null @@ -1,45 +0,0 @@ -process BWA_MEM { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? "bioconda::bwa=0.7.17 bioconda::samtools=1.12" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:66ed1b38d280722529bb8a0167b0cf02f8a0b488-0' : - 'quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:66ed1b38d280722529bb8a0167b0cf02f8a0b488-0' }" - - input: - tuple val(meta), path(reads) - path index - val sort_bam - - output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def read_group = meta.read_group ? "-R ${meta.read_group}" : "" - def samtools_command = sort_bam ? 'sort' : 'view' - """ - INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` - - bwa mem \\ - $args \\ - $read_group \\ - -t $task.cpus \\ - \$INDEX \\ - $reads \\ - | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/bwa/mem/meta.yml b/modules/nf-core/modules/bwa/mem/meta.yml deleted file mode 100644 index c7c28f1957..0000000000 --- a/modules/nf-core/modules/bwa/mem/meta.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: bwa_mem -description: Performs fastq alignment to a fasta reference using BWA -keywords: - - mem - - bwa - - alignment - - map - - fastq - - bam - - sam -tools: - - bwa: - description: | - BWA is a software package for mapping DNA sequences against - a large reference genome, such as the human genome. - homepage: http://bio-bwa.sourceforge.net/ - documentation: http://www.htslib.org/doc/samtools.html - arxiv: arXiv:1303.3997 - licence: ['GPL-3.0-or-later'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - - index: - type: file - description: BWA genome index files - pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" - - sort_bam: - type: boolean - description: use samtools sort (true) or samtools view (false) - pattern: "true or false" -output: - - bam: - type: file - description: Output BAM file containing read alignments - pattern: "*.{bam}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@jeremy1805" diff --git a/modules/nf-core/modules/bwamem2/index/main.nf b/modules/nf-core/modules/bwamem2/index/main.nf deleted file mode 100644 index 0e9cc2f8ba..0000000000 --- a/modules/nf-core/modules/bwamem2/index/main.nf +++ /dev/null @@ -1,34 +0,0 @@ -process BWAMEM2_INDEX { - tag "$fasta" - label 'process_high' - - conda (params.enable_conda ? "bioconda::bwa-mem2=2.2.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bwa-mem2:2.2.1--he513fc3_0' : - 'quay.io/biocontainers/bwa-mem2:2.2.1--he513fc3_0' }" - - input: - path fasta - - output: - path "bwamem2" , emit: index - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - mkdir bwamem2 - bwa-mem2 \\ - index \\ - $args \\ - $fasta -p bwamem2/${fasta} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/bwamem2/index/meta.yml b/modules/nf-core/modules/bwamem2/index/meta.yml deleted file mode 100644 index e0f6014cd7..0000000000 --- a/modules/nf-core/modules/bwamem2/index/meta.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: bwamem2_index -description: Create BWA-mem2 index for reference genome -keywords: - - index - - fasta - - genome - - reference -tools: - - bwa: - description: | - BWA-mem2 is a software package for mapping DNA sequences against - a large reference genome, such as the human genome. - homepage: https://github.com/bwa-mem2/bwa-mem2 - documentation: https://github.com/bwa-mem2/bwa-mem2#usage - licence: ['MIT'] -input: - - fasta: - type: file - description: Input genome fasta file -output: - - index: - type: file - description: BWA genome index files - pattern: "*.{0132,amb,ann,bwt.2bit.64,pac}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@maxulysse" diff --git a/modules/nf-core/modules/bwamem2/mem/main.nf b/modules/nf-core/modules/bwamem2/mem/main.nf deleted file mode 100644 index 56f595ec6c..0000000000 --- a/modules/nf-core/modules/bwamem2/mem/main.nf +++ /dev/null @@ -1,46 +0,0 @@ -process BWAMEM2_MEM { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? "bioconda::bwa-mem2=2.2.1 bioconda::samtools=1.12" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:cf603b12db30ec91daa04ba45a8ee0f35bbcd1e2-0' : - 'quay.io/biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:cf603b12db30ec91daa04ba45a8ee0f35bbcd1e2-0' }" - - input: - tuple val(meta), path(reads) - path index - val sort_bam - - output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def read_group = meta.read_group ? "-R ${meta.read_group}" : "" - def samtools_command = sort_bam ? 'sort' : 'view' - """ - INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` - - bwa-mem2 \\ - mem \\ - $args \\ - $read_group \\ - -t $task.cpus \\ - \$INDEX \\ - $reads \\ - | samtools $samtools_command $args2 -@ $task.cpus -o ${prefix}.bam - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/bwamem2/mem/meta.yml b/modules/nf-core/modules/bwamem2/mem/meta.yml deleted file mode 100644 index 71e8375968..0000000000 --- a/modules/nf-core/modules/bwamem2/mem/meta.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: bwamem2_mem -description: Performs fastq alignment to a fasta reference using BWA -keywords: - - mem - - bwa - - alignment - - map - - fastq - - bam - - sam -tools: - - bwa: - description: | - BWA-mem2 is a software package for mapping DNA sequences against - a large reference genome, such as the human genome. - homepage: https://github.com/bwa-mem2/bwa-mem2 - documentation: http://www.htslib.org/doc/samtools.html - arxiv: arXiv:1303.3997 - licence: ['MIT'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - - index: - type: file - description: BWA genome index files - pattern: "Directory containing BWA index *.{0132,amb,ann,bwt.2bit.64,pac}" - - sort_bam: - type: boolean - description: use samtools sort (true) or samtools view (false) - pattern: "true or false" -output: - - bam: - type: file - description: Output BAM file containing read alignments - pattern: "*.{bam}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@maxulysse" diff --git a/modules/nf-core/modules/cat/fastq/main.nf b/modules/nf-core/modules/cat/fastq/main.nf deleted file mode 100644 index bf0877c3e8..0000000000 --- a/modules/nf-core/modules/cat/fastq/main.nf +++ /dev/null @@ -1,51 +0,0 @@ -process CAT_FASTQ { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" - - input: - tuple val(meta), path(reads, stageAs: "input*/*") - - output: - tuple val(meta), path("*.merged.fastq.gz"), emit: reads - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def readList = reads.collect{ it.toString() } - if (meta.single_end) { - if (readList.size > 1) { - """ - cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') - END_VERSIONS - """ - } - } else { - if (readList.size > 2) { - def read1 = [] - def read2 = [] - readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } - """ - cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz - cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') - END_VERSIONS - """ - } - } -} diff --git a/modules/nf-core/modules/cat/fastq/meta.yml b/modules/nf-core/modules/cat/fastq/meta.yml deleted file mode 100644 index 1992fa34ea..0000000000 --- a/modules/nf-core/modules/cat/fastq/meta.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: cat_fastq -description: Concatenates fastq files -keywords: - - fastq - - concatenate -tools: - - cat: - description: | - The cat utility reads files sequentially, writing them to the standard output. - documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html - licence: ['GPL-3.0-or-later'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: list - description: | - List of input FastQ files to be concatenated. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: Merged fastq file - pattern: "*.{merged.fastq.gz}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@joseespinosa" - - "@drpatelh" diff --git a/modules/nf-core/modules/cnvkit/batch/main.nf b/modules/nf-core/modules/cnvkit/batch/main.nf deleted file mode 100644 index 7c44d9f614..0000000000 --- a/modules/nf-core/modules/cnvkit/batch/main.nf +++ /dev/null @@ -1,55 +0,0 @@ -process CNVKIT_BATCH { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? 'bioconda::cnvkit=0.9.9' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.9--pyhdfd78af_0' : - 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" - - input: - tuple val(meta), path(tumor), path(normal) - path fasta - path targets - path reference - - output: - tuple val(meta), path("*.bed"), emit: bed - tuple val(meta), path("*.cnn"), emit: cnn, optional: true - tuple val(meta), path("*.cnr"), emit: cnr, optional: true - tuple val(meta), path("*.cns"), emit: cns, optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def normal_args = normal ? "--normal $normal" : "" - def fasta_args = fasta ? "--fasta $fasta" : "" - def reference_args = reference ? "--reference $reference" : "" - - def target_args = "" - if (args.contains("--method wgs") || args.contains("-m wgs")) { - target_args = targets ? "--targets $targets" : "" - } - else { - target_args = "--targets $targets" - } - """ - cnvkit.py \\ - batch \\ - $tumor \\ - $normal_args \\ - $fasta_args \\ - $reference_args \\ - $target_args \\ - --processes $task.cpus \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/cnvkit/batch/meta.yml b/modules/nf-core/modules/cnvkit/batch/meta.yml deleted file mode 100644 index 0d263041c4..0000000000 --- a/modules/nf-core/modules/cnvkit/batch/meta.yml +++ /dev/null @@ -1,93 +0,0 @@ -name: cnvkit_batch -description: Copy number variant detection from high-throughput sequencing data -keywords: - - bam - - fasta - - copy number -tools: - - cnvkit: - description: | - CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. - homepage: https://cnvkit.readthedocs.io/en/stable/index.html - documentation: https://cnvkit.readthedocs.io/en/stable/index.html - licence: ['Apache-2.0'] -params: - - outdir: - type: string - description: | - The pipeline's output directory. By default, the module will - output files into `$params.outdir/` - - publish_dir_mode: - type: string - description: | - Value for the Nextflow `publishDir` mode parameter. - Available: symlink, rellink, link, copy, copyNoFollow, move. - - enable_conda: - type: boolean - description: | - Run the module with Conda using the software specified - via the `conda` directive - - singularity_pull_docker_container: - type: boolean - description: | - Instead of directly downloading Singularity images for use with Singularity, - force the workflow to pull and convert Docker containers instead. -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - tumour: - type: file - description: | - Input tumour sample bam file (or cram) - - normal: - type: file - description: | - Input normal sample bam file (or cram) - - fasta: - type: file - description: | - Input reference genome fasta file - - targetfile: - type: file - description: | - Input target bed file - - reference: - type: file - description: | - Input reference cnn-file (only for germline and tumor-only running) -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bed: - type: file - description: File containing genomic regions - pattern: "*.{bed}" - - cnn: - type: file - description: File containing coverage information - pattern: "*.{cnn}" - - cnr: - type: file - description: File containing copy number ratio information - pattern: "*.{cnr}" - - cns: - type: file - description: File containing copy number segment information - pattern: "*.{cns}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@kaurravneet4123" - - "@KevinMenden" - - "@MaxUlysse" - - "@drpatelh" - - "@fbdtemme" - - "@lassefolkersen" diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index 327d510056..0000000000 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_low' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index 5b5b8a6026..0000000000 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ['MIT'] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" - -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100644 index d139039254..0000000000 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env python - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
Process Name Software Version
{process if (i == 0) else ''}{tool}{version}
") - return "\\n".join(html) - - -versions_this_module = {} -versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, -} - -with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - -# aggregate versions by the module name (derived from fully-qualified process name) -versions_by_module = {} -for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - assert versions_by_module[module] == process_versions, ( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - -versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", -} - -versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), -} - -with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) -with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - -with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) diff --git a/modules/nf-core/modules/deepvariant/main.nf b/modules/nf-core/modules/deepvariant/main.nf deleted file mode 100644 index c5e819973c..0000000000 --- a/modules/nf-core/modules/deepvariant/main.nf +++ /dev/null @@ -1,47 +0,0 @@ -process DEEPVARIANT { - tag "$meta.id" - label 'process_medium' - - - if (params.enable_conda) { - exit 1, "Conda environments cannot be used when using the DeepVariant tool. Please use docker or singularity containers." - } - - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'google/deepvariant:1.3.0' : - 'google/deepvariant:1.3.0' }" - - input: - tuple val(meta), path(input), path(index), path(intervals) - path(fasta) - path(fai) - - output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - tuple val(meta), path("*g.vcf.gz"), emit: gvcf - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def regions = intervals ? "--regions ${intervals}" : "" - - """ - /opt/deepvariant/bin/run_deepvariant \\ - --ref=${fasta} \\ - --reads=${input} \\ - --output_vcf=${prefix}.vcf.gz \\ - --output_gvcf=${prefix}.g.vcf.gz \\ - ${args} \\ - ${regions} \\ - --num_shards=${task.cpus} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - deepvariant: \$(echo \$(/opt/deepvariant/bin/run_deepvariant --version) | sed 's/^.*version //; s/ .*\$//' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/deepvariant/meta.yml b/modules/nf-core/modules/deepvariant/meta.yml deleted file mode 100644 index d4423d699a..0000000000 --- a/modules/nf-core/modules/deepvariant/meta.yml +++ /dev/null @@ -1,62 +0,0 @@ -name: deepvariant -description: DeepVariant is an analysis pipeline that uses a deep neural network to call genetic variants from next-generation DNA sequencing data -keywords: - - variant calling - - machine learning -tools: - - deepvariant: - description: DeepVariant is an analysis pipeline that uses a deep neural network to call genetic variants from next-generation DNA sequencing data - homepage: https://github.com/google/deepvariant - documentation: https://github.com/google/deepvariant - tool_dev_url: https://github.com/google/deepvariant - doi: "https://doi.org/10.1038/nbt.4235" - licence: ['BSD-3-clause'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM file - pattern: "*.bam/cram" - - index: - type: file - description: Index of BAM/CRAM file - pattern: "*.bai/crai" - - interval: - type: file - description: Interval file for targeted regions - pattern: "*.bed" - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - fai: - type: file - description: Index of reference fasta file - pattern: "*.fai" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - vcf: - type: file - description: Compressed VCF file - pattern: "*.vcf.gz" - - gvcf: - type: file - description: Compressed GVCF file - pattern: "*.g.vcf.gz" - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - -authors: - - "@abhi18av" diff --git a/modules/nf-core/modules/ensemblvep/Dockerfile b/modules/nf-core/modules/ensemblvep/Dockerfile deleted file mode 100644 index ac1b469117..0000000000 --- a/modules/nf-core/modules/ensemblvep/Dockerfile +++ /dev/null @@ -1,30 +0,0 @@ -FROM nfcore/base:1.14 -LABEL \ - author="Maxime Garcia" \ - description="VEP image for nf-core pipelines" \ - maintainer="maxime.garcia@scilifelab.se" - -# Install the conda environment -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a - -# Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-vep-104.3/bin:$PATH - -# Setup default ARG variables -ARG GENOME=GRCh38 -ARG SPECIES=homo_sapiens -ARG VEP_VERSION=99 - -# Download Genome -RUN vep_install \ - -a c \ - -c .vep \ - -s ${SPECIES} \ - -y ${GENOME} \ - --CACHE_VERSION ${VEP_VERSION} \ - --CONVERT \ - --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE - -# Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-vep-104.3 > nf-core-vep-104.3.yml diff --git a/modules/nf-core/modules/ensemblvep/build.sh b/modules/nf-core/modules/ensemblvep/build.sh deleted file mode 100644 index 5fcb91dfe7..0000000000 --- a/modules/nf-core/modules/ensemblvep/build.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Build and push all containers - -build_push() { - GENOME=$1 - SPECIES=$2 - VEP_VERSION=$3 - VEP_TAG=$4 - - docker build \ - -t nfcore/vep:${VEP_TAG}.${GENOME} \ - software/vep/. \ - --build-arg GENOME=${GENOME} \ - --build-arg SPECIES=${SPECIES} \ - --build-arg VEP_VERSION=${VEP_VERSION} - - docker push nfcore/vep:${VEP_TAG}.${GENOME} -} - -build_push "GRCh37" "homo_sapiens" "104" "104.3" -build_push "GRCh38" "homo_sapiens" "104" "104.3" -build_push "GRCm38" "mus_musculus" "102" "104.3" -build_push "GRCm39" "mus_musculus" "104" "104.3" -build_push "CanFam3.1" "canis_lupus_familiaris" "104" "104.3" -build_push "WBcel235" "caenorhabditis_elegans" "104" "104.3" diff --git a/modules/nf-core/modules/ensemblvep/environment.yml b/modules/nf-core/modules/ensemblvep/environment.yml deleted file mode 100644 index c0731c26d8..0000000000 --- a/modules/nf-core/modules/ensemblvep/environment.yml +++ /dev/null @@ -1,10 +0,0 @@ -# You can use this file to create a conda environment for this module: -# conda env create -f environment.yml -name: nf-core-vep-104.3 -channels: - - conda-forge - - bioconda - - defaults - -dependencies: - - bioconda::ensembl-vep=104.3 diff --git a/modules/nf-core/modules/ensemblvep/main.nf b/modules/nf-core/modules/ensemblvep/main.nf deleted file mode 100644 index c2bd055fa2..0000000000 --- a/modules/nf-core/modules/ensemblvep/main.nf +++ /dev/null @@ -1,52 +0,0 @@ -process ENSEMBLVEP { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::ensembl-vep=104.3" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ensembl-vep:104.3--pl5262h4a94de4_0' : - 'quay.io/biocontainers/ensembl-vep:104.3--pl5262h4a94de4_0' }" - - input: - tuple val(meta), path(vcf) - val genome - val species - val cache_version - path cache - - output: - tuple val(meta), path("*.ann.vcf"), emit: vcf - path "*.summary.html" , emit: report - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" - """ - mkdir $prefix - - vep \\ - -i $vcf \\ - -o ${prefix}.ann.vcf \\ - $args \\ - --assembly $genome \\ - --species $species \\ - --cache \\ - --cache_version $cache_version \\ - --dir_cache $dir_cache \\ - --fork $task.cpus \\ - --vcf \\ - --stats_file ${prefix}.summary.html - - rm -rf $prefix - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/ensemblvep/meta.yml b/modules/nf-core/modules/ensemblvep/meta.yml deleted file mode 100644 index 1b8192279a..0000000000 --- a/modules/nf-core/modules/ensemblvep/meta.yml +++ /dev/null @@ -1,65 +0,0 @@ -name: ENSEMBLVEP -description: Ensembl Variant Effect Predictor (VEP) -keywords: - - annotation -tools: - - ensemblvep: - description: | - VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs - or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. - homepage: https://www.ensembl.org/info/docs/tools/vep/index.html - documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html - licence: ['Apache-2.0'] -params: - - use_cache: - type: boolean - description: | - Enable the usage of containers with cache - Does not work with conda - - vep_tag: - type: value - description: | - Specify the tag for the container - https://hub.docker.com/r/nfcore/vep/tags -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - vcf: - type: file - description: | - vcf to annotate - - genome: - type: value - description: | - which genome to annotate with - - species: - type: value - description: | - which species to annotate with - - cache_version: - type: value - description: | - which version of the cache to annotate with - - cache: - type: file - description: | - path to VEP cache (optional) -output: - - vcf: - type: file - description: | - annotated vcf - pattern: "*.ann.vcf" - - report: - type: file - description: VEP report file - pattern: "*.html" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@maxulysse" diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf deleted file mode 100644 index ed6b8c50b1..0000000000 --- a/modules/nf-core/modules/fastqc/main.nf +++ /dev/null @@ -1,47 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Add soft-links to original FastQs for consistent naming in pipeline - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - fastqc $args --threads $task.cpus ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } -} diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/modules/fastqc/meta.yml deleted file mode 100644 index b09553a3c3..0000000000 --- a/modules/nf-core/modules/fastqc/meta.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: fastqc -description: Run FastQC on sequenced reads -keywords: - - quality control - - qc - - adapters - - fastq -tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ - licence: ['GPL-2.0-only'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" diff --git a/modules/nf-core/modules/fgbio/callmolecularconsensusreads/main.nf b/modules/nf-core/modules/fgbio/callmolecularconsensusreads/main.nf deleted file mode 100644 index 3aab935be0..0000000000 --- a/modules/nf-core/modules/fgbio/callmolecularconsensusreads/main.nf +++ /dev/null @@ -1,32 +0,0 @@ -process FGBIO_CALLMOLECULARCONSENSUSREADS { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::fgbio=1.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fgbio:1.3.0--0' : - 'quay.io/biocontainers/fgbio:1.3.0--0' }" - - input: - tuple val(meta), path(bam) - - output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - fgbio \\ - CallMolecularConsensusReads \\ - -i $bam \\ - $args \\ - -o ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/fgbio/callmolecularconsensusreads/meta.yml b/modules/nf-core/modules/fgbio/callmolecularconsensusreads/meta.yml deleted file mode 100644 index 523f32145a..0000000000 --- a/modules/nf-core/modules/fgbio/callmolecularconsensusreads/meta.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: fgbio_callmolecularconsensusreads -description: Calls consensus sequences from reads with the same unique molecular tag. - -keywords: - - UMIs - - consensus sequence - - bam - - sam -tools: - - fgbio: - description: Tools for working with genomic and high throughput sequencing data. - homepage: https://github.com/fulcrumgenomics/fgbio - documentation: http://fulcrumgenomics.github.io/fgbio/ - licence: ['MIT'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false, collapse:false ] - - bam: - type: file - description: | - The input SAM or BAM file. - pattern: "*.{bam,sam}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: | - Output SAM or BAM file to write consensus reads. - pattern: "*.{bam,sam}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@sruthipsuresh" diff --git a/modules/nf-core/modules/fgbio/fastqtobam/main.nf b/modules/nf-core/modules/fgbio/fastqtobam/main.nf deleted file mode 100644 index 3060d7153f..0000000000 --- a/modules/nf-core/modules/fgbio/fastqtobam/main.nf +++ /dev/null @@ -1,39 +0,0 @@ -process FGBIO_FASTQTOBAM { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::fgbio=1.4.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fgbio:1.4.0--hdfd78af_0' : - 'quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0' }" - - input: - tuple val(meta), path(reads) - val read_structure - - output: - tuple val(meta), path("*_umi_converted.bam"), emit: umibam - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - mkdir tmp - - fgbio \\ - --tmp-dir=${PWD}/tmp \\ - FastqToBam \\ - -i $reads \\ - -o "${prefix}_umi_converted.bam" \\ - --read-structures $read_structure \\ - --sample $meta.id \\ - --library $meta.id \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/fgbio/fastqtobam/meta.yml b/modules/nf-core/modules/fgbio/fastqtobam/meta.yml deleted file mode 100644 index e356d315a6..0000000000 --- a/modules/nf-core/modules/fgbio/fastqtobam/meta.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: fgbio_fastqtobam -description: | - Using the FGBIO tools, converts FASTQ files sequenced with UMIs into BAM files, moving the UMI barcode into the RX field of the BAM file -keywords: - - fastqtobam - - fgbio -tools: - - fgbio: - description: A set of tools for working with genomic and high throughput sequencing data, including UMIs - homepage: http://fulcrumgenomics.github.io/fgbio/ - documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/ - tool_dev_url: https://github.com/fulcrumgenomics/fgbio - doi: "" - licence: ['MIT'] - -input: - - reads: - type: file - description: pair of reads to be converted into BAM file - pattern: "*.{fastq.gz}" - - - read_structure: - type: string - description: | - A read structure should always be provided for each of the fastq files. - If single end, the string will contain only one structure (i.e. "2M11S+T"), if paired-end the string - will contain two structures separated by a blank space (i.e. "2M11S+T 2M11S+T"). - If the read does not contain any UMI, the structure will be +T (i.e. only template of any length). - https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.yml}" - - umibam: - type: file - description: Converted, unsorted BAM file with RX tag reporting UMI sequence (if any) - pattern: "*.{bam}" - -authors: - - "@lescai" diff --git a/modules/nf-core/modules/fgbio/groupreadsbyumi/main.nf b/modules/nf-core/modules/fgbio/groupreadsbyumi/main.nf deleted file mode 100644 index 47f000a599..0000000000 --- a/modules/nf-core/modules/fgbio/groupreadsbyumi/main.nf +++ /dev/null @@ -1,40 +0,0 @@ -process FGBIO_GROUPREADSBYUMI { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::fgbio=1.4.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fgbio:1.4.0--hdfd78af_0' : - 'quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0' }" - - input: - tuple val(meta), path(taggedbam) - val(strategy) - - output: - tuple val(meta), path("*_umi-grouped.bam") , emit: bam - tuple val(meta), path("*_umi_histogram.txt"), emit: histogram - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - mkdir tmp - - fgbio \\ - --tmp-dir=${PWD}/tmp \\ - GroupReadsByUmi \\ - -s $strategy \\ - $args \\ - -i $taggedbam \\ - -o ${prefix}_umi-grouped.bam \\ - -f ${prefix}_umi_histogram.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/fgbio/groupreadsbyumi/meta.yml b/modules/nf-core/modules/fgbio/groupreadsbyumi/meta.yml deleted file mode 100644 index 18ce149e52..0000000000 --- a/modules/nf-core/modules/fgbio/groupreadsbyumi/meta.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: fgbio_groupreadsbyumi -description: | - Groups reads together that appear to have come from the same original molecule. - Reads are grouped by template, and then templates are sorted by the 5’ mapping positions - of the reads from the template, used from earliest mapping position to latest. - Reads that have the same end positions are then sub-grouped by UMI sequence. - (!) Note: the MQ tag is required on reads with mapped mates (!) - This can be added using samblaster with the optional argument --addMateTags. -keywords: - - UMI - - groupreads - - fgbio -tools: - - fgbio: - description: A set of tools for working with genomic and high throughput sequencing data, including UMIs - homepage: http://fulcrumgenomics.github.io/fgbio/ - documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/ - tool_dev_url: https://github.com/fulcrumgenomics/fgbio - doi: "" - licence: ['MIT'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: | - BAM file. Note: the MQ tag is required on reads with mapped mates (!) - pattern: "*.bam" - - strategy: - type: value - description: | - Reguired argument: defines the UMI assignment strategy. - Must be chosen among: Identity, Edit, Adjacency, Paired. - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - bam: - type: file - description: UMI-grouped BAM - pattern: "*.bam" - - histogram: - type: file - description: A text file containing the tag family size counts - pattern: "*.txt" - -authors: - - "@lescai" diff --git a/modules/nf-core/modules/freebayes/main.nf b/modules/nf-core/modules/freebayes/main.nf deleted file mode 100644 index f9ab59fc29..0000000000 --- a/modules/nf-core/modules/freebayes/main.nf +++ /dev/null @@ -1,70 +0,0 @@ -process FREEBAYES { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::freebayes=1.3.5" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/freebayes:1.3.5--py38ha193a2f_3' : - 'quay.io/biocontainers/freebayes:1.3.5--py38ha193a2f_3' }" - - input: - tuple val(meta), path(input_1), path(input_1_index), path(input_2), path(input_2_index), path (target_bed) - path fasta - path fasta_fai - path samples - path populations - path cnv - - output: - tuple val(meta), path("*.vcf.gz"), emit: vcf - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def input = input_2 ? "${input_1} ${input_2}" : "${input_1}" - def targets_file = target_bed ? "--target ${target_bed}" : "" - def samples_file = samples ? "--samples ${samples}" : "" - def populations_file = populations ? "--populations ${populations}" : "" - def cnv_file = cnv ? "--cnv-map ${cnv}" : "" - - if (task.cpus > 1) { - """ - freebayes-parallel \\ - <(fasta_generate_regions.py $fasta_fai 10000) $task.cpus \\ - -f $fasta \\ - $targets_file \\ - $samples_file \\ - $populations_file \\ - $cnv_file \\ - $args \\ - $input > ${prefix}.vcf - - bgzip ${prefix}.vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - freebayes: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' ) - END_VERSIONS - """ - - } else { - """ - freebayes \\ - -f $fasta \\ - $targets_file \\ - $samples_file \\ - $populations_file \\ - $cnv_file \\ - $args \\ - $input > ${prefix}.vcf - - gzip --no-name ${prefix}.vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - freebayes: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' ) - END_VERSIONS - """ - } -} diff --git a/modules/nf-core/modules/freebayes/meta.yml b/modules/nf-core/modules/freebayes/meta.yml deleted file mode 100644 index abba1daa37..0000000000 --- a/modules/nf-core/modules/freebayes/meta.yml +++ /dev/null @@ -1,82 +0,0 @@ -name: freebayes -description: A haplotype-based variant detector -keywords: - - variant caller - - SNP - - genotyping - - somatic variant calling - - germline variant calling - - bacterial variant calling - - bayesian - -tools: - - freebayes: - description: Bayesian haplotype-based polymorphism discovery and genotyping - homepage: https://github.com/freebayes/freebayes - documentation: https://github.com/freebayes/freebayes - tool_dev_url: https://github.com/freebayes/freebayes - doi: "arXiv:1207.3907" - licence: ['MIT'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - input_index: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.bam.bai" - - fasta: - type: file - description: reference fasta file - pattern: ".{fa,fa.gz,fasta,fasta.gz}" - - fasta_fai: - type: file - description: reference fasta file index - pattern: "*.{fa,fasta}.fai" - - targets: - type: file - description: Optional - Limit analysis to targets listed in this BED-format FILE. - pattern: "*.bed" - - samples: - type: file - description: Optional - Limit analysis to samples listed (one per line) in the FILE. - pattern: "*.txt" - - populations: - type: file - description: Optional - Each line of FILE should list a sample and a population which it is part of. - pattern: "*.txt" - - cnv: - type: file - description: | - A copy number map BED file, which has either a sample-level ploidy: - sample_name copy_number - or a region-specific format: - seq_name start end sample_name copy_number - pattern: "*.bed" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - vcf: - type: file - description: Compressed VCF file - pattern: "*.vcf.gz" - -authors: - - "@maxibor" - - "@FriederikeHanssen" - - "@maxulysse" diff --git a/modules/nf-core/modules/gatk4/applybqsr/main.nf b/modules/nf-core/modules/gatk4/applybqsr/main.nf deleted file mode 100644 index 672e93e045..0000000000 --- a/modules/nf-core/modules/gatk4/applybqsr/main.nf +++ /dev/null @@ -1,51 +0,0 @@ -process GATK4_APPLYBQSR { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - input: - tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) - path fasta - path fai - path dict - - output: - tuple val(meta), path("*.bam"), emit: bam, optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def interval = intervals ? "-L ${intervals}" : "" - def file_type = input.getExtension() - - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" ApplyBQSR \\ - -R $fasta \\ - -I $input \\ - --bqsr-recal-file $bqsr_table \\ - $interval \\ - --tmp-dir . \\ - -O ${prefix}.${file_type} \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gatk4/applybqsr/meta.yml b/modules/nf-core/modules/gatk4/applybqsr/meta.yml deleted file mode 100644 index ad1f82a1e4..0000000000 --- a/modules/nf-core/modules/gatk4/applybqsr/meta.yml +++ /dev/null @@ -1,68 +0,0 @@ -name: gatk4_applybqsr -description: Apply base quality score recalibration (BQSR) to a bam file -keywords: - - bqsr - - bam -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['Apache-2.0'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM file from alignment - pattern: "*.{bam,cram}" - - input_index: - type: file - description: BAI/CRAI file from alignment - pattern: "*.{bai,crai}" - - bqsr_table: - type: file - description: Recalibration table from gatk4_baserecalibrator - - intervals: - type: file - description: Bed file with the genomic regions included in the library (optional) - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - fai: - type: file - description: Index of reference fasta file - pattern: "*.fasta.fai" - - dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" - - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - bam: - type: file - description: Recalibrated BAM file - pattern: "*.{bam}" - -authors: - - "@yocra3" - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/gatk4/applyvqsr/main.nf b/modules/nf-core/modules/gatk4/applyvqsr/main.nf deleted file mode 100644 index 9923397520..0000000000 --- a/modules/nf-core/modules/gatk4/applyvqsr/main.nf +++ /dev/null @@ -1,55 +0,0 @@ -process GATK4_APPLYVQSR { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - input: - tuple val(meta), path(vcf), path(tbi), path(recal), path(recalidx), path(tranches) - path fasta - path fai - path dict - val allelespecific - val truthsensitivity - val mode - - output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - tuple val(meta), path("*.tbi") , emit: tbi - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - refCommand = fasta ? "-R ${fasta} " : '' - alleleSpecificCommand = allelespecific ? '-AS' : '' - truthSensitivityCommand = truthsensitivity ? "--truth-sensitivity-filter-level ${truthsensitivity}" : '' - modeCommand = mode ? "--mode ${mode} " : 'SNP' - - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK ApplyVQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" ApplyVQSR \\ - ${refCommand} \\ - -V ${vcf} \\ - -O ${prefix}.vcf.gz \\ - ${alleleSpecificCommand} \\ - ${truthSensitivityCommand} \\ - --tranches-file $tranches \\ - --recal-file $recal \\ - ${modeCommand} \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gatk4/applyvqsr/meta.yml b/modules/nf-core/modules/gatk4/applyvqsr/meta.yml deleted file mode 100644 index b757f3e971..0000000000 --- a/modules/nf-core/modules/gatk4/applyvqsr/meta.yml +++ /dev/null @@ -1,88 +0,0 @@ -name: gatk4_applyvqsr -description: | - Apply a score cutoff to filter variants based on a recalibration table. - AplyVQSR performs the second pass in a two-stage process called Variant Quality Score Recalibration (VQSR). - Specifically, it applies filtering to the input variants based on the recalibration table produced - in the first step by VariantRecalibrator and a target sensitivity value. -keywords: - - gatk4 - - applyvqsr - - VQSR -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['Apache-2.0'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test'] - - vcf: - type: file - description: VCF file to be recalibrated, this should be the same file as used for the first stage VariantRecalibrator. - pattern: "*.vcf" - - tbi: - type: file - description: Tbi index for the input vcf file. - pattern: "*.vcf.tbi" - - recal: - type: file - description: Recalibration file produced when the input vcf was run through VariantRecalibrator in stage 1. - pattern: "*.recal" - - recalidx: - type: file - description: Index file for the recalibration file. - pattern: ".recal.idx" - - tranches: - type: boolean - description: Tranches file produced when the input vcf was run through VariantRecalibrator in stage 1. - pattern: ".tranches" - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - fai: - type: file - description: Index of reference fasta file - pattern: "*.fasta.fai" - - dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" - - allelespecific: - type: boolean - description: Whether or not to run ApplyVQSR in allele specific mode, this should be kept the same as the stage 1 VariantRecalibrator run. - pattern: "{true,false}" - - truthsensitivity: - type: double - description: Value to be used as the truth sensitivity cutoff score. - pattern: "99.0" - - mode: - type: String - description: Specifies which recalibration mode to employ, should be the same as the stage 1 VariantRecalibrator run. (SNP is default, BOTH is intended for testing only) - pattern: "{SNP,INDEL,BOTH}" - -output: - - vcf: - type: file - description: compressed vcf file containing the recalibrated variants. - pattern: "*.vcf.gz" - - tbi: - type: file - description: Index of recalibrated vcf file. - pattern: "*vcf.gz.tbi" - - versions: - type: file - description: File containing software versions. - pattern: "versions.yml" - -authors: - - "@GCJMackenzie" diff --git a/modules/nf-core/modules/gatk4/baserecalibrator/main.nf b/modules/nf-core/modules/gatk4/baserecalibrator/main.nf deleted file mode 100644 index 48c127f0ee..0000000000 --- a/modules/nf-core/modules/gatk4/baserecalibrator/main.nf +++ /dev/null @@ -1,53 +0,0 @@ -process GATK4_BASERECALIBRATOR { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - input: - tuple val(meta), path(input), path(input_index), path(intervals) - path fasta - path fai - path dict - path knownSites - path knownSites_tbi - - output: - tuple val(meta), path("*.table"), emit: table - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def intervalsCommand = intervals ? "-L ${intervals}" : "" - def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ') - - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - - """ - gatk --java-options "-Xmx${avail_mem}g" BaseRecalibrator \ - -R $fasta \ - -I $input \ - $sitesCommand \ - $intervalsCommand \ - --tmp-dir . \ - $args \ - -O ${prefix}.table - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gatk4/baserecalibrator/meta.yml b/modules/nf-core/modules/gatk4/baserecalibrator/meta.yml deleted file mode 100644 index 641a50df02..0000000000 --- a/modules/nf-core/modules/gatk4/baserecalibrator/meta.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: gatk4_baserecalibrator -description: Generate recalibration table for Base Quality Score Recalibration (BQSR) -keywords: - - sort -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['Apache-2.0'] - - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM file from alignment - pattern: "*.{bam,cram}" - - input_index: - type: file - description: BAI/CRAI file from alignment - pattern: "*.{bai,crai}" - - intervals: - type: file - description: Bed file with the genomic regions included in the library (optional) - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - fai: - type: file - description: Index of reference fasta file - pattern: "*.fasta.fai" - - dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" - - knownSites: - type: file - description: Bed file with the genomic regions included in the library (optional) - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - table: - type: file - description: Recalibration table from BaseRecalibrator - pattern: "*.{table}" - -authors: - - "@yocra3" - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/gatk4/calculatecontamination/meta.yml b/modules/nf-core/modules/gatk4/calculatecontamination/meta.yml deleted file mode 100644 index 8c84373209..0000000000 --- a/modules/nf-core/modules/gatk4/calculatecontamination/meta.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: gatk4_calculatecontamination -description: | - Calculates the fraction of reads from cross-sample contamination based on summary tables from getpileupsummaries. Output to be used with filtermutectcalls. -keywords: - - gatk4 - - calculatecontamination - - cross-samplecontamination - - getpileupsummaries - - filtermutectcalls -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['Apache-2.0'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - pileup: - type: file - description: File containing the pileups summary table of a tumor sample to be used to calculate contamination. - pattern: "*.pileups.table" - - matched: - type: file - description: File containing the pileups summary table of a normal sample that matches with the tumor sample specified in pileup argument. This is an optional input. - pattern: "*.pileups.table" - - segmentout: - type: boolean - description: specifies whether to output the segmentation table. - -output: - - contamination: - type: file - description: File containing the contamination table. - pattern: "*.contamination.table" - - segmentation: - type: file - description: optional output table containing segmentation of tumor minor allele fractions. - pattern: "*.segmentation.table" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@GCJMackenzie" diff --git a/modules/nf-core/modules/gatk4/createsequencedictionary/main.nf b/modules/nf-core/modules/gatk4/createsequencedictionary/main.nf deleted file mode 100644 index 714843c273..0000000000 --- a/modules/nf-core/modules/gatk4/createsequencedictionary/main.nf +++ /dev/null @@ -1,40 +0,0 @@ -process GATK4_CREATESEQUENCEDICTIONARY { - tag "$fasta" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - input: - path fasta - - output: - path "*.dict" , emit: dict - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def avail_mem = 6 - if (!task.memory) { - log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" \\ - CreateSequenceDictionary \\ - --REFERENCE $fasta \\ - --URI $fasta \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/modules/gatk4/createsequencedictionary/meta.yml deleted file mode 100644 index 54f479b393..0000000000 --- a/modules/nf-core/modules/gatk4/createsequencedictionary/meta.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: gatk4_createsequencedictionary -description: Creates a sequence dictionary for a reference sequence -keywords: - - dictionary - - fasta -tools: - - gatk: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['Apache-2.0'] - -input: - - fasta: - type: file - description: Input fasta file - pattern: "*.{fasta,fa}" -output: - - dict: - type: file - description: gatk dictionary file - pattern: "*.{dict}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@maxulysse" diff --git a/modules/nf-core/modules/gatk4/estimatelibrarycomplexity/main.nf b/modules/nf-core/modules/gatk4/estimatelibrarycomplexity/main.nf deleted file mode 100644 index 2894e305bc..0000000000 --- a/modules/nf-core/modules/gatk4/estimatelibrarycomplexity/main.nf +++ /dev/null @@ -1,47 +0,0 @@ -process GATK4_ESTIMATELIBRARYCOMPLEXITY { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - input: - tuple val(meta), path(cram) - path(fasta) - path(fai) - path(dict) - - output: - tuple val(meta), path('*.metrics'), emit: metrics - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def crams = cram.collect(){ x -> "-I ".concat(x.toString()) }.join(" ") - - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK EstimateLibraryComplexity] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" EstimateLibraryComplexity \ - ${crams} \ - -O ${prefix}.metrics \ - --REFERENCE_SEQUENCE ${fasta} \ - --VALIDATION_STRINGENCY SILENT \ - --TMP_DIR . $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gatk4/estimatelibrarycomplexity/meta.yml b/modules/nf-core/modules/gatk4/estimatelibrarycomplexity/meta.yml deleted file mode 100644 index 94c1817d9d..0000000000 --- a/modules/nf-core/modules/gatk4/estimatelibrarycomplexity/meta.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: gatk4_estimatelibrarycomplexity -description: Estimates the numbers of unique molecules in a sequencing library. -keywords: - - gatk4 - - gatk4_estimatelibrarycomplexity - - duplication_metrics - - reporting -tools: - - gatk4: - description: Genome Analysis Toolkit (GATK4) - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us - tool_dev_url: https://github.com/broadinstitute/gatk - doi: "10.1158/1538-7445.AM2017-3590" - licence: ['Apache-2.0'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - cram: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - fai: - type: file - description: Index of reference fasta file - pattern: "fasta.fai" - - dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - metrics: - type: file - description: File containing metrics on the input files - pattern: "*.{metrics}" - -authors: - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/gatk4/filtermutectcalls/main.nf b/modules/nf-core/modules/gatk4/filtermutectcalls/main.nf deleted file mode 100644 index a7dd9a61a9..0000000000 --- a/modules/nf-core/modules/gatk4/filtermutectcalls/main.nf +++ /dev/null @@ -1,64 +0,0 @@ -process GATK4_FILTERMUTECTCALLS { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - input: - tuple val(meta), path(vcf), path(tbi), path(stats), path(orientationbias), path(segmentation), path(contaminationfile), val(contaminationest) - path fasta - path fai - path dict - - output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - tuple val(meta), path("*.vcf.gz.tbi") , emit: tbi - tuple val(meta), path("*.filteringStats.tsv"), emit: stats - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - def orientationbias_options = '' - if (orientationbias) { - orientationbias_options = '--orientation-bias-artifact-priors ' + orientationbias.join(' --orientation-bias-artifact-priors ') - } - - def segmentation_options = '' - if (segmentation) { - segmentation_options = '--tumor-segmentation ' + segmentation.join(' --tumor-segmentation ') - } - - def contamination_options = contaminationest ? " --contamination-estimate ${contaminationest} " : '' - if (contaminationfile) { - contamination_options = '--contamination-table ' + contaminationfile.join(' --contamination-table ') - } - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK FilterMutectCalls] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" FilterMutectCalls \\ - -R $fasta \\ - -V $vcf \\ - $orientationbias_options \\ - $segmentation_options \\ - $contamination_options \\ - -O ${prefix}.vcf.gz \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gatk4/filtermutectcalls/meta.yml b/modules/nf-core/modules/gatk4/filtermutectcalls/meta.yml deleted file mode 100644 index 7d85e2b915..0000000000 --- a/modules/nf-core/modules/gatk4/filtermutectcalls/meta.yml +++ /dev/null @@ -1,84 +0,0 @@ -name: gatk4_filtermutectcalls -description: | - Filters the raw output of mutect2, can optionally use outputs of calculatecontamination and learnreadorientationmodel to improve filtering. -keywords: - - filtermutectcalls - - mutect2 - - gatk4 - - filtervcf -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - vcf: - type: file - description: compressed vcf file of mutect2calls - pattern: "*.vcf.gz" - - tbi: - type: file - description: Index of vcf file - pattern: "*vcf.gz.tbi" - - stats: - type: file - description: Stats file that pairs with output vcf file - pattern: "*vcf.gz.stats" - - orientationbias: - type: list - description: files containing artifact priors for input vcf. Optional input. - pattern: "*.artifact-prior.tar.gz" - - segmentation: - type: list - description: tables containing segmentation information for input vcf. Optional input. - pattern: "*.segmentation.table" - - contaminationfile: - type: list - description: table(s) containing contamination contamination data for input vcf. Optional input, takes priority over contaminationest. - pattern: "*.contamination.table" - - contaminationest: - type: val - description: estimation of contamination value as a double. Optional input, will only be used if contaminationfile is not specified. - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - fai: - type: file - description: Index of reference fasta file - pattern: "*.fasta.fai" - - dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" - -output: - - vcf: - type: file - description: file containing filtered mutect2 calls. - pattern: "*.vcf.gz" - - tbi: - type: file - description: tbi file that pairs with vcf. - pattern: "*.vcf.gz.tbi" - - stats: - type: file - description: file containing statistics of the filtermutectcalls run. - pattern: "*.filteringStats.tsv" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@GCJMackenzie" diff --git a/modules/nf-core/modules/gatk4/gatherbqsrreports/meta.yml b/modules/nf-core/modules/gatk4/gatherbqsrreports/meta.yml deleted file mode 100644 index f71afd69d1..0000000000 --- a/modules/nf-core/modules/gatk4/gatherbqsrreports/meta.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: gatk4_gatherbqsrreports -description: write your description here -keywords: - - gatk4 - - gatk4_gatherbqsrreports - - base_recalibration -tools: - - gatk4: - description: Genome Analysis Toolkit (GATK4) - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us - tool_dev_url: https://github.com/broadinstitute/gatk - doi: "10.1158/1538-7445.AM2017-3590" - licence: ['BSD-3-clause'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - recal_table: - type: file - description: File(s) containing BQSR table(s) - pattern: "*.table" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - recal_table: - type: file - description: File containing joined BQSR table - pattern: "*.table" - -authors: - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/gatk4/genomicsdbimport/main.nf b/modules/nf-core/modules/gatk4/genomicsdbimport/main.nf deleted file mode 100644 index e88471e186..0000000000 --- a/modules/nf-core/modules/gatk4/genomicsdbimport/main.nf +++ /dev/null @@ -1,66 +0,0 @@ -process GATK4_GENOMICSDBIMPORT { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - input: - tuple val(meta), path(vcf), path(tbi), path(intervalfile), val(intervalval), path(wspace) - val run_intlist - val run_updatewspace - val input_map - - output: - tuple val(meta), path("${prefix}") , optional:true, emit: genomicsdb - tuple val(meta), path("$updated_db") , optional:true, emit: updatedb - tuple val(meta), path("*.interval_list"), optional:true, emit: intervallist - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - - // settings for running default create gendb mode - inputs_command = input_map ? "--sample-name-map ${vcf[0]}" : "${'-V ' + vcf.join(' -V ')}" - dir_command = "--genomicsdb-workspace-path ${prefix}" - intervals_command = intervalfile ? " -L ${intervalfile} " : " -L ${intervalval} " - - // settings changed for running get intervals list mode if run_intlist is true - if (run_intlist) { - inputs_command = '' - dir_command = "--genomicsdb-update-workspace-path ${wspace}" - intervals_command = "--output-interval-list-to-file ${prefix}.interval_list" - } - - // settings changed for running update gendb mode. inputs_command same as default, update_db forces module to emit the updated gendb - if (run_updatewspace) { - dir_command = "--genomicsdb-update-workspace-path ${wspace}" - intervals_command = '' - updated_db = wspace.toString() - } - - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK GenomicsDBImport] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" GenomicsDBImport \\ - $inputs_command \\ - $dir_command \\ - $intervals_command \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gatk4/genomicsdbimport/meta.yml b/modules/nf-core/modules/gatk4/genomicsdbimport/meta.yml deleted file mode 100644 index af626cb1f2..0000000000 --- a/modules/nf-core/modules/gatk4/genomicsdbimport/meta.yml +++ /dev/null @@ -1,84 +0,0 @@ -name: gatk4_genomicsdbimport -description: merge GVCFs from multiple samples. For use in joint genotyping or somatic panel of normal creation. -keywords: - - gatk4 - - genomicsdbimport - - genomicsdb - - panelofnormalscreation - - jointgenotyping -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test'] - - vcf: - type: list - description: either a list of vcf files to be used to create or update a genomicsdb, or a file that contains a map to vcf files to be used. - pattern: "*.vcf.gz" - - - tbi: - type: list - description: list of tbi files that match with the input vcf files - pattern: "*.vcf.gz_tbi" - - - wspace: - type: path - description: path to an existing genomicsdb to be used in update db mode or get intervals mode. This WILL NOT specify name of a new genomicsdb in create db mode. - pattern: "/path/to/existing/gendb" - - - intervalfile: - type: file - description: file containing the intervals to be used when creating the genomicsdb - pattern: "*.interval_list" - - - intervalval: - type: string - description: if an intervals file has not been spcified, the value enetered here will be used as an interval via the "-L" argument - pattern: "example: chr1:1000-10000" - - - run_intlist: - type: boolean - description: Specify whether to run get interval list mode, this option cannot be specified at the same time as run_updatewspace. - pattern: "true/false" - - - run_updatewspace: - type: boolean - description: Specify whether to run update genomicsdb mode, this option takes priority over run_intlist. - pattern: "true/false" - - - input_map: - type: boolean - description: Specify whether the vcf input is providing a list of vcf file(s) or a single file containing a map of paths to vcf files to be used to create or update a genomicsdb. - pattern: "*.sample_map" - -output: - - genomicsdb: - type: directory - description: Directory containing the files that compose the genomicsdb workspace, this is only output for create mode, as update changes an existing db - pattern: "*/$prefix" - - updatedb: - type: directory - description: Directory containing the files that compose the updated genomicsdb workspace, this is only output for update mode, and should be the same path as the input wspace. - pattern: "same/path/as/wspace" - - intervallist: - type: file - description: File containing the intervals used to generate the genomicsdb, only created by get intervals mode. - pattern: "*.interval_list" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@GCJMackenzie" diff --git a/modules/nf-core/modules/gatk4/genotypegvcfs/main.nf b/modules/nf-core/modules/gatk4/genotypegvcfs/main.nf deleted file mode 100644 index 016da1f5b0..0000000000 --- a/modules/nf-core/modules/gatk4/genotypegvcfs/main.nf +++ /dev/null @@ -1,50 +0,0 @@ -process GATK4_GENOTYPEGVCFS { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - input: - tuple val(meta), path(gvcf), path(gvcf_index), path(intervals) - path fasta - path fasta_index - path fasta_dict - path dbsnp - path dbsnp_index - - output: - tuple val(meta), path("*.vcf.gz"), emit: vcf - tuple val(meta), path("*.tbi") , emit: tbi - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def dbsnp_options = dbsnp ? "-D ${dbsnp}" : "" - def interval_options = intervals ? "-L ${intervals}" : "" - def gvcf_options = gvcf.name.endsWith(".vcf") || gvcf.name.endsWith(".vcf.gz") ? "$gvcf" : "gendb://$gvcf" - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK GenotypeGVCFs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" \\ - GenotypeGVCFs \\ - $args \\ - $interval_options \\ - $dbsnp_options \\ - -R $fasta \\ - -V $gvcf_options \\ - -O ${prefix}.vcf.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml b/modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml deleted file mode 100644 index 2c9767b2e5..0000000000 --- a/modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml +++ /dev/null @@ -1,72 +0,0 @@ -name: gatk4_genotypegvcfs -description: | - Perform joint genotyping on one or more samples pre-called with HaplotypeCaller. -keywords: - - joint genotyping - - genotype - - gvcf -tools: - - gatk4: - description: Genome Analysis Toolkit (GATK4) - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - tool_dev_url: https://github.com/broadinstitute/gatk - doi: "10.1158/1538-7445.AM2017-3590" - licence: ['BSD-3-clause'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - gvcf: - type: tuple of files - description: | - Tuple of gVCF(.gz) file (first) and its index (second) or the path to a GenomicsDB (and empty) - pattern: ["*.{vcf,vcf.gz}", "*.{idx,tbi}"] - - intervals: - type: file - description: Bed file with the genomic regions included in the library (optional) - - fasta: - type: file - description: Reference fasta file - pattern: "*.fasta" - - fasta_index: - type: file - description: Reference fasta index file - pattern: "*.fai" - - fasta_dict: - type: file - description: Reference fasta sequence dict file - pattern: "*.dict" - - dbsnp: - type: file - description: dbSNP VCF file - pattern: "*.vcf.gz" - - dbsnp_index: - type: tuple of files - description: dbSNP VCF index file - pattern: "*.tbi" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - vcf: - type: file - description: Genotyped VCF file - pattern: "*.vcf.gz" - - tbi: - type: file - description: Tbi index for VCF file - pattern: "*.vcf.gz" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@santiagorevale" diff --git a/modules/nf-core/modules/gatk4/getpileupsummaries/main.nf b/modules/nf-core/modules/gatk4/getpileupsummaries/main.nf deleted file mode 100644 index 6d98874f49..0000000000 --- a/modules/nf-core/modules/gatk4/getpileupsummaries/main.nf +++ /dev/null @@ -1,51 +0,0 @@ -process GATK4_GETPILEUPSUMMARIES { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - input: - tuple val(meta), path(input), path(index), path(intervals) - path fasta - path fai - path dict - path variants - path variants_tbi - - output: - tuple val(meta), path('*.pileups.table'), emit: table - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def sitesCommand = intervals ? " -L ${intervals} " : " -L ${variants} " - def reference = fasta ? " -R ${fasta}" :"" - - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK GetPileupSummaries] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" GetPileupSummaries \\ - -I $input \\ - -V $variants \\ - $sitesCommand \\ - ${reference} \\ - -O ${prefix}.pileups.table \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gatk4/getpileupsummaries/meta.yml b/modules/nf-core/modules/gatk4/getpileupsummaries/meta.yml deleted file mode 100644 index ccf6446d4c..0000000000 --- a/modules/nf-core/modules/gatk4/getpileupsummaries/meta.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: gatk4_getpileupsummaries -description: | - Summarizes counts of reads that support reference, alternate and other alleles for given sites. Results can be used with CalculateContamination. Requires a common germline variant sites file, such as from gnomAD. -keywords: - - gatk4 - - getpileupsumaries - - readcountssummary - - germlinevariantsites -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['Apache-2.0'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - input: - type: file - description: BAM/CRAM file to be summarised. - pattern: "*.{bam,cram}" - - input_index: - type: file - description: BAM/CRAM file index. - pattern: "*.{bai,crai}" - - intervals: - type: file - description: File containing specified sites to be used for the summary. If this option is not specified, variants file is used instead automatically. - pattern: "*.interval_list" - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - fai: - type: file - description: Index of reference fasta file - pattern: "*.fasta.fai" - - dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" - - variants: - type: file - description: Population vcf of germline sequencing, containing allele fractions. Is also used as sites file if no separate sites file is specified. - pattern: "*.vcf.gz" - - variants_tbi: - type: file - description: Index file for the germline resource. - pattern: "*.vcf.gz.tbi" - - -output: - - pileup: - type: file - description: File containing the pileup summary table. - pattern: "*.pileups.table" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@GCJMackenzie" diff --git a/modules/nf-core/modules/gatk4/haplotypecaller/main.nf b/modules/nf-core/modules/gatk4/haplotypecaller/main.nf deleted file mode 100644 index 6f03ffd2cc..0000000000 --- a/modules/nf-core/modules/gatk4/haplotypecaller/main.nf +++ /dev/null @@ -1,54 +0,0 @@ -process GATK4_HAPLOTYPECALLER { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - input: - tuple val(meta), path(input), path(input_index), path(intervals) - path fasta - path fai - path dict - path dbsnp - path dbsnp_tbi - - output: - tuple val(meta), path("*.vcf.gz"), emit: vcf - tuple val(meta), path("*.tbi") , emit: tbi - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def interval_option = intervals ? "-L ${intervals}" : "" - def dbsnp_option = dbsnp ? "-D ${dbsnp}" : "" - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk \\ - --java-options "-Xmx${avail_mem}g" \\ - HaplotypeCaller \\ - -R $fasta \\ - -I $input \\ - ${dbsnp_option} \\ - ${interval_option} \\ - -O ${prefix}.vcf.gz \\ - $args \\ - --tmp-dir . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gatk4/haplotypecaller/meta.yml b/modules/nf-core/modules/gatk4/haplotypecaller/meta.yml deleted file mode 100644 index 869bd1d2dd..0000000000 --- a/modules/nf-core/modules/gatk4/haplotypecaller/meta.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: gatk4_haplotypecaller -description: Call germline SNPs and indels via local re-assembly of haplotypes -keywords: - - gatk4 - - haplotypecaller - - haplotype -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['Apache-2.0'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM file from alignment - pattern: "*.{bam,cram}" - - input_index: - type: file - description: BAI/CRAI file from alignment - pattern: "*.{bai,crai}" - - intervals: - type: file - description: Bed file with the genomic regions included in the library (optional) - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - fai: - type: file - description: Index of reference fasta file - pattern: "fasta.fai" - - dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" - - dbsnp: - type: file - description: VCF file containing known sites (optional) - - dbsnp_tbi: - type: file - description: VCF index of dbsnp (optional) - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - vcf: - type: file - description: Compressed VCF file - pattern: "*.vcf.gz" - - tbi: - type: file - description: Index of VCF file - pattern: "*.vcf.gz.tbi" - -authors: - - "@suzannejin" - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/gatk4/markduplicates/main.nf b/modules/nf-core/modules/gatk4/markduplicates/main.nf deleted file mode 100644 index 735b093ee9..0000000000 --- a/modules/nf-core/modules/gatk4/markduplicates/main.nf +++ /dev/null @@ -1,46 +0,0 @@ -process GATK4_MARKDUPLICATES { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - input: - tuple val(meta), path(bams) - - output: - tuple val(meta), path("*.bam") , emit: bam - tuple val(meta), path("*.bai") , emit: bai - tuple val(meta), path("*.metrics"), emit: metrics - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ") - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" MarkDuplicates \\ - $bam_list \\ - --METRICS_FILE ${prefix}.metrics \\ - --TMP_DIR . \\ - --CREATE_INDEX true \\ - --OUTPUT ${prefix}.bam \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gatk4/markduplicates/meta.yml b/modules/nf-core/modules/gatk4/markduplicates/meta.yml deleted file mode 100644 index 5777067a4f..0000000000 --- a/modules/nf-core/modules/gatk4/markduplicates/meta.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: gatk4_markduplicates -description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. -keywords: - - markduplicates - - bam - - sort -tools: - - gatk4: - description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- - tool_dev_url: https://github.com/broadinstitute/gatk - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['MIT'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: Sorted BAM file - pattern: "*.{bam}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - bam: - type: file - description: Marked duplicates BAM file - pattern: "*.{bam}" - - metrics: - type: file - description: Duplicate metrics file generated by GATK - pattern: "*.{metrics.txt}" - -authors: - - "@ajodeh-juma" - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/gatk4/mutect2/main.nf b/modules/nf-core/modules/gatk4/mutect2/main.nf deleted file mode 100644 index a7afe86d05..0000000000 --- a/modules/nf-core/modules/gatk4/mutect2/main.nf +++ /dev/null @@ -1,80 +0,0 @@ -process GATK4_MUTECT2 { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - input: - tuple val(meta) , path(input) , path(input_index) , path(intervals), val(which_norm) - val run_single - val run_pon - val run_mito - path fasta - path fai - path dict - path germline_resource - path germline_resource_tbi - path panel_of_normals - path panel_of_normals_tbi - - output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - tuple val(meta), path("*.tbi") , emit: tbi - tuple val(meta), path("*.stats") , emit: stats - tuple val(meta), path("*.f1r2.tar.gz"), optional:true, emit: f1r2 - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def panels_command = '' - def normals_command = '' - - def inputs_command = '-I ' + input.join( ' -I ') - def interval = intervals ? "-L ${intervals}" : "" - - if(run_pon) { - panels_command = '' - normals_command = '' - - } else if(run_single) { - panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals" - normals_command = '' - - } else if(run_mito){ - panels_command = "-L ${intervals} --mitochondria-mode" - normals_command = '' - - } else { - panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals --f1r2-tar-gz ${prefix}.f1r2.tar.gz" - normals_command = '-normal ' + which_norm.join( ' -normal ') - } - - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" Mutect2 \\ - -R ${fasta} \\ - ${inputs_command} \\ - ${normals_command} \\ - ${panels_command} \\ - ${interval} \\ - -O ${prefix}.vcf.gz \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gatk4/mutect2/meta.yml b/modules/nf-core/modules/gatk4/mutect2/meta.yml deleted file mode 100644 index 94ce72ee62..0000000000 --- a/modules/nf-core/modules/gatk4/mutect2/meta.yml +++ /dev/null @@ -1,105 +0,0 @@ -name: gatk4_mutect2 -description: Call somatic SNVs and indels via local assembly of haplotypes. -keywords: - - gatk4 - - mutect2 - - haplotype - - somatic -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['Apache-2.0'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test'] - - input: - type: list - description: list of BAM files, also able to take CRAM as an input - pattern: "*.{bam/cram}" - - input_index: - type: list - description: list of BAM file indexes, also able to take CRAM indexes as an input - pattern: "*.{bam.bai/cram.crai}" - - intervals: - type: File/string - description: Specify region the tools is run on. - pattern: ".{bed,interval_list}/chrM" - - which_norm: - type: list - description: optional list of sample headers contained in the normal sample bam files (these are required for tumor_normal_pair mode) - pattern: "testN" - - run_single: - type: boolean - description: Specify whether or not to run in tumor_single mode instead of tumor_normal_pair mode (will be ignored if run_pon is also true) - pattern: "true/false" - - run_pon: - type: boolean - description: Specify whether or not to run in panel_of_normal mode instead of tumor_normal_pair mode - pattern: "true/false" - - run_mito: - type: boolean - description: Specify whether or not to run in mitochondria-mode instead of tumor_normal_pair mode - pattern: "true/false" - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - fai: - type: file - description: Index of reference fasta file - pattern: "*.fasta.fai" - - dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" - - germline_resource: - type: file - description: Population vcf of germline sequencing, containing allele fractions. - pattern: "*.vcf.gz" - - germline_resource_tbi: - type: file - description: Index file for the germline resource. - pattern: "*.vcf.gz.tbi" - - panel_of_normals: - type: file - description: vcf file to be used as a panel of normals. - pattern: "*.vcf.gz" - - panel_of_normals_tbi: - type: file - description: Index for the panel of normals. - pattern: "*.vcf.gz.tbi" - -output: - - vcf: - type: file - description: compressed vcf file - pattern: "*.vcf.gz" - - tbi: - type: file - description: Index of vcf file - pattern: "*vcf.gz.tbi" - - stats: - type: file - description: Stats file that pairs with output vcf file - pattern: "*vcf.gz.stats" - - f1r2: - type: file - description: file containing information to be passed to LearnReadOrientationModel (only outputted when tumor_normal_pair mode is run) - pattern: "*.f1r2.tar.gz" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@GCJMackenzie" diff --git a/modules/nf-core/modules/gatk4/variantrecalibrator/main.nf b/modules/nf-core/modules/gatk4/variantrecalibrator/main.nf deleted file mode 100644 index df8a959995..0000000000 --- a/modules/nf-core/modules/gatk4/variantrecalibrator/main.nf +++ /dev/null @@ -1,65 +0,0 @@ -process GATK4_VARIANTRECALIBRATOR { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.4.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.1--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.4.1--hdfd78af_0' }" - - input: - tuple val(meta), path(vcf) , path(tbi) - path fasta - path fai - path dict - val allelespecific - tuple path(resvcfs), path(restbis), val(reslabels) - val annotation - val mode - val create_rscript - - output: - tuple val(meta), path("*.recal") , emit: recal - tuple val(meta), path("*.idx") , emit: idx - tuple val(meta), path("*.tranches"), emit: tranches - tuple val(meta), path("*plots.R") , emit: plots, optional:true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - refCommand = fasta ? "-R ${fasta} " : '' - alleleSpecificCommand = allelespecific ? '-AS' : '' - resourceCommand = '--resource:' + reslabels.join( ' --resource:') - annotationCommand = '-an ' + annotation.join( ' -an ') - modeCommand = mode ? "--mode ${mode} " : 'SNP' - rscriptCommand = create_rscript ? "--rscript-file ${prefix}.plots.R" : '' - - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK VariantRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - gatk --java-options "-Xmx${avail_mem}g" VariantRecalibrator \\ - ${refCommand} \\ - -V ${vcf} \\ - ${alleleSpecificCommand} \\ - ${resourceCommand} \\ - ${annotationCommand} \\ - ${modeCommand} \\ - -O ${prefix}.recal \\ - --tranches-file ${prefix}.tranches \\ - ${rscriptCommand}\\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gatk4/variantrecalibrator/meta.yml b/modules/nf-core/modules/gatk4/variantrecalibrator/meta.yml deleted file mode 100644 index 92416a5820..0000000000 --- a/modules/nf-core/modules/gatk4/variantrecalibrator/meta.yml +++ /dev/null @@ -1,98 +0,0 @@ -name: gatk4_variantrecalibrator -description: | - Build a recalibration model to score variant quality for filtering purposes. - It is highly recommended to follow GATK best practices when using this module, - the gaussian mixture model requires a large number of samples to be used for the - tool to produce optimal results. For example, 30 samples for exome data. For more details see - https://gatk.broadinstitute.org/hc/en-us/articles/4402736812443-Which-training-sets-arguments-should-I-use-for-running-VQSR- -keywords: - - VariantRecalibrator - - gatk4 - - recalibration_model -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - vcf: - type: file - description: input vcf file containing the variants to be recalibrated - pattern: "*.vcf.gz" - - tbi: - type: file - description: tbi file matching with -vcf - pattern: "*.vcf.gz.tbi" - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - fai: - type: file - description: Index of reference fasta file - pattern: "fasta.fai" - - dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" - - allelespecific: - type: boolean - description: specify whether to use allele specific annotations - pattern: "{true,false}" - - resvcfs: - type: list - description: resource files to be used as truth, training and known sites resources, this imports the files into the module, file names are specified again in the resource_labels to be called via the command. - pattern: '*/hapmap_3.3.hg38_chr21.vcf.gz' - - restbis: - type: list - description: tbis for the corresponding vcfs files to be used as truth, training and known resources. - pattern: '*/hapmap_3.3.hg38_chr21.vcf.gz.tbi' - - reslabels: - type: list - description: labels for the resource files to be used as truth, training and known sites resources, label should include an identifier,which kind of resource(s) it is, prior value and name of the file. - pattern: "hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38_chr21.vcf.gz" - - annotation: - type: list - description: specify which annotations should be used for calculations. - pattern: "['QD', 'MQ', 'FS', 'SOR']" - - mode: - type: string - description: specifies which recalibration mode to employ (SNP is default, BOTH is intended for testing only) - pattern: "{SNP,INDEL,BOTH}" - - rscript: - type: boolean - description: specify whether to generate rscript.plot output file - pattern: "{true,false}" -output: - - recal: - type: file - description: Output recal file used by ApplyVQSR - pattern: "*.recal" - - idx: - type: file - description: Index file for the recal output file - pattern: "*.idx" - - tranches: - type: file - description: Output tranches file used by ApplyVQSR - pattern: "*.tranches" - - plots: - type: file - description: Optional output rscript file to aid in visualization of the input data and learned model. - pattern: "*plots.R" - - version: - type: file - description: File containing software versions - pattern: "*.versions.yml" -authors: - - "@GCJMackenzie" diff --git a/modules/nf-core/modules/manta/germline/main.nf b/modules/nf-core/modules/manta/germline/main.nf deleted file mode 100644 index 69d2f541ec..0000000000 --- a/modules/nf-core/modules/manta/germline/main.nf +++ /dev/null @@ -1,56 +0,0 @@ -process MANTA_GERMLINE { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? "bioconda::manta=1.6.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/manta:1.6.0--h9ee0642_1' : - 'quay.io/biocontainers/manta:1.6.0--h9ee0642_1' }" - - input: - tuple val(meta), path(input), path(input_index), path (target_bed), path(target_bed_tbi) - path fasta - path fai - - output: - tuple val(meta), path("*candidate_small_indels.vcf.gz") , emit: candidate_small_indels_vcf - tuple val(meta), path("*candidate_small_indels.vcf.gz.tbi"), emit: candidate_small_indels_vcf_tbi - tuple val(meta), path("*candidate_sv.vcf.gz") , emit: candidate_sv_vcf - tuple val(meta), path("*candidate_sv.vcf.gz.tbi") , emit: candidate_sv_vcf_tbi - tuple val(meta), path("*diploid_sv.vcf.gz") , emit: diploid_sv_vcf - tuple val(meta), path("*diploid_sv.vcf.gz.tbi") , emit: diploid_sv_vcf_tbi - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def options_manta = target_bed ? "--callRegions $target_bed" : "" - """ - configManta.py \ - --bam $input \ - --reference $fasta \ - $options_manta \ - $args \ - --runDir manta - - python manta/runWorkflow.py -m local -j $task.cpus - - mv manta/results/variants/candidateSmallIndels.vcf.gz \ - ${prefix}.candidate_small_indels.vcf.gz - mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \ - ${prefix}.candidate_small_indels.vcf.gz.tbi - mv manta/results/variants/candidateSV.vcf.gz \ - ${prefix}.candidate_sv.vcf.gz - mv manta/results/variants/candidateSV.vcf.gz.tbi \ - ${prefix}.candidate_sv.vcf.gz.tbi - mv manta/results/variants/diploidSV.vcf.gz \ - ${prefix}.diploid_sv.vcf.gz - mv manta/results/variants/diploidSV.vcf.gz.tbi \ - ${prefix}.diploid_sv.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - manta: \$( configManta.py --version ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/manta/germline/meta.yml b/modules/nf-core/modules/manta/germline/meta.yml deleted file mode 100644 index 3bdb82641b..0000000000 --- a/modules/nf-core/modules/manta/germline/meta.yml +++ /dev/null @@ -1,87 +0,0 @@ -name: manta_germline -description: Manta calls structural variants (SVs) and indels from mapped paired-end sequencing reads. It is optimized for analysis of germline variation in small sets of individuals and somatic variation in tumor/normal sample pairs. -keywords: - - somatic - - wgs - - wxs - - panel - - vcf - - structural variants - - small indels -tools: - - manta: - description: Structural variant and indel caller for mapped sequencing data - homepage: https://github.com/Illumina/manta - documentation: https://github.com/Illumina/manta/blob/v1.6.0/docs/userGuide/README.md - tool_dev_url: https://github.com/Illumina/manta - doi: "10.1093/bioinformatics/btv710" - licence: ['GPL v3'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - input_index: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - fasta: - type: file - description: Genome reference FASTA file - pattern: "*.{fa,fasta}" - - fai: - type: file - description: Genome reference FASTA index file - pattern: "*.{fa.fai,fasta.fai}" - - target_bed: - type: file - description: BED file containing target regions for variant calling - pattern: "*.{bed}" - - target_bed_tbi: - type: file - description: Index for BED file containing target regions for variant calling - pattern: "*.{bed.tbi}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - candidate_small_indels_vcf: - type: file - description: Gzipped VCF file containing variants - pattern: "*.{vcf.gz}" - - candidate_small_indels_vcf_tbi: - type: file - description: Index for gzipped VCF file containing variants - pattern: "*.{vcf.gz.tbi}" - - candidate_sv_vcf: - type: file - description: Gzipped VCF file containing variants - pattern: "*.{vcf.gz}" - - candidate_sv_vcf_tbi: - type: file - description: Index for gzipped VCF file containing variants - pattern: "*.{vcf.gz.tbi}" - - diploid_sv_vcf: - type: file - description: Gzipped VCF file containing variants - pattern: "*.{vcf.gz}" - - diploid_sv_vcf_tbi: - type: file - description: Index for gzipped VCF file containing variants - pattern: "*.{vcf.gz.tbi}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@maxulysse" diff --git a/modules/nf-core/modules/manta/somatic/main.nf b/modules/nf-core/modules/manta/somatic/main.nf deleted file mode 100644 index 18a5f0214e..0000000000 --- a/modules/nf-core/modules/manta/somatic/main.nf +++ /dev/null @@ -1,55 +0,0 @@ -process MANTA_SOMATIC { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? "bioconda::manta=1.6.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/manta:1.6.0--h9ee0642_1' : - 'quay.io/biocontainers/manta:1.6.0--h9ee0642_1' }" - - input: - tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path (interval), path(interval_index) - path fasta - path fai - - output: - tuple val(meta), path("*.candidate_small_indels.vcf.gz") , emit: candidate_small_indels_vcf - tuple val(meta), path("*.candidate_small_indels.vcf.gz.tbi") , emit: candidate_small_indels_vcf_tbi - tuple val(meta), path("*.candidate_sv.vcf.gz") , emit: candidate_sv_vcf - tuple val(meta), path("*.candidate_sv.vcf.gz.tbi") , emit: candidate_sv_vcf_tbi - tuple val(meta), path("*.diploid_sv.vcf.gz") , emit: diploid_sv_vcf - tuple val(meta), path("*.diploid_sv.vcf.gz.tbi") , emit: diploid_sv_vcf_tbi - tuple val(meta), path("*.somatic_sv.vcf.gz") , emit: somatic_sv_vcf - tuple val(meta), path("*.somatic_sv.vcf.gz.tbi") , emit: somatic_sv_vcf_tbi - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def options_manta = interval ? "--callRegions $interval" : "" - - """ - configManta.py \ - --tumorBam $input_tumor \ - --normalBam $input_normal \ - --reference $fasta \ - $options_manta \ - --runDir manta - - python manta/runWorkflow.py -m local -j $task.cpus - - mv manta/results/variants/candidateSmallIndels.vcf.gz ${prefix}.candidate_small_indels.vcf.gz - mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi ${prefix}.candidate_small_indels.vcf.gz.tbi - mv manta/results/variants/candidateSV.vcf.gz ${prefix}.candidate_sv.vcf.gz - mv manta/results/variants/candidateSV.vcf.gz.tbi ${prefix}.candidate_sv.vcf.gz.tbi - mv manta/results/variants/diploidSV.vcf.gz ${prefix}.diploid_sv.vcf.gz - mv manta/results/variants/diploidSV.vcf.gz.tbi ${prefix}.diploid_sv.vcf.gz.tbi - mv manta/results/variants/somaticSV.vcf.gz ${prefix}.somatic_sv.vcf.gz - mv manta/results/variants/somaticSV.vcf.gz.tbi ${prefix}.somatic_sv.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - manta: \$( configManta.py --version ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/manta/somatic/meta.yml b/modules/nf-core/modules/manta/somatic/meta.yml deleted file mode 100644 index ddd0eafe74..0000000000 --- a/modules/nf-core/modules/manta/somatic/meta.yml +++ /dev/null @@ -1,103 +0,0 @@ -name: manta_somatic -description: Manta calls structural variants (SVs) and indels from mapped paired-end sequencing reads. It is optimized for analysis of germline variation in small sets of individuals and somatic variation in tumor/normal sample pairs. -keywords: - - somatic - - wgs - - wxs - - panel - - vcf - - structural variants - - small indels -tools: - - manta: - description: Structural variant and indel caller for mapped sequencing data - homepage: https://github.com/Illumina/manta - documentation: https://github.com/Illumina/manta/blob/v1.6.0/docs/userGuide/README.md - tool_dev_url: https://github.com/Illumina/manta - doi: "10.1093/bioinformatics/btv710" - licence: ['GPL v3'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input_normal: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - input_index_normal: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - input_tumor: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - input_index_tumor: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - fasta: - type: file - description: Genome reference FASTA file - pattern: "*.{fa,fasta}" - - fai: - type: file - description: Genome reference FASTA index file - pattern: "*.{fa.fai,fasta.fai}" - - target_bed: - type: file - description: BED file containing target regions for variant calling - pattern: "*.{bed}" - - target_bed_tbi: - type: file - description: Index for BED file containing target regions for variant calling - pattern: "*.{bed.tbi}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - candidate_small_indels_vcf: - type: file - description: Gzipped VCF file containing variants - pattern: "*.{vcf.gz}" - - candidate_small_indels_vcf_tbi: - type: file - description: Index for gzipped VCF file containing variants - pattern: "*.{vcf.gz.tbi}" - - candidate_sv_vcf: - type: file - description: Gzipped VCF file containing variants - pattern: "*.{vcf.gz}" - - candidate_sv_vcf_tbi: - type: file - description: Index for gzipped VCF file containing variants - pattern: "*.{vcf.gz.tbi}" - - diploid_sv_vcf: - type: file - description: Gzipped VCF file containing variants - pattern: "*.{vcf.gz}" - - diploid_sv_vcf_tbi: - type: file - description: Index for gzipped VCF file containing variants - pattern: "*.{vcf.gz.tbi}" - - somatic_sv_vcf: - type: file - description: Gzipped VCF file containing variants - pattern: "*.{vcf.gz}" - - somatic_sv_vcf_tbi: - type: file - description: Index for gzipped VCF file containing variants - pattern: "*.{vcf.gz.tbi}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/manta/tumoronly/meta.yml b/modules/nf-core/modules/manta/tumoronly/meta.yml deleted file mode 100644 index 86d1c6c0c5..0000000000 --- a/modules/nf-core/modules/manta/tumoronly/meta.yml +++ /dev/null @@ -1,87 +0,0 @@ -name: manta_tumoronly -description: Manta calls structural variants (SVs) and indels from mapped paired-end sequencing reads. It is optimized for analysis of germline variation in small sets of individuals and somatic variation in tumor/normal sample pairs. -keywords: - - somatic - - wgs - - wxs - - panel - - vcf - - structural variants - - small indels -tools: - - manta: - description: Structural variant and indel caller for mapped sequencing data - homepage: https://github.com/Illumina/manta - documentation: https://github.com/Illumina/manta/blob/v1.6.0/docs/userGuide/README.md - tool_dev_url: https://github.com/Illumina/manta - doi: "10.1093/bioinformatics/btv710" - licence: ['GPL v3'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - input_index: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - fasta: - type: file - description: Genome reference FASTA file - pattern: "*.{fa,fasta}" - - fai: - type: file - description: Genome reference FASTA index file - pattern: "*.{fa.fai,fasta.fai}" - - target_bed: - type: file - description: BED file containing target regions for variant calling - pattern: "*.{bed}" - - target_bed_tbi: - type: file - description: Index for BED file containing target regions for variant calling - pattern: "*.{bed.tbi}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - candidate_small_indels_vcf: - type: file - description: Gzipped VCF file containing variants - pattern: "*.{vcf.gz}" - - candidate_small_indels_vcf_tbi: - type: file - description: Index for gzipped VCF file containing variants - pattern: "*.{vcf.gz.tbi}" - - candidate_sv_vcf: - type: file - description: Gzipped VCF file containing variants - pattern: "*.{vcf.gz}" - - candidate_sv_vcf_tbi: - type: file - description: Index for gzipped VCF file containing variants - pattern: "*.{vcf.gz.tbi}" - - tumor_sv_vcf: - type: file - description: Gzipped VCF file containing variants - pattern: "*.{vcf.gz}" - - tumor_sv_vcf_tbi: - type: file - description: Index for gzipped VCF file containing variants - pattern: "*.{vcf.gz.tbi}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@maxulysse" diff --git a/modules/nf-core/modules/msisensorpro/msi_somatic/meta.yml b/modules/nf-core/modules/msisensorpro/msi_somatic/meta.yml deleted file mode 100644 index 09bc0e738b..0000000000 --- a/modules/nf-core/modules/msisensorpro/msi_somatic/meta.yml +++ /dev/null @@ -1,80 +0,0 @@ -name: msisensorpro_msi_somatic -description: MSIsensor-pro evaluates Microsatellite Instability (MSI) for cancer patients with next generation sequencing data. It accepts the whole genome sequencing, whole exome sequencing and target region (panel) sequencing data as input -keywords: - - micro-satellite-scan - - msisensor-pro - - msi - - somatic -tools: - - msisensorpro: - description: Microsatellite Instability (MSI) detection using high-throughput sequencing data. - homepage: https://github.com/xjtu-omics/msisensor-pro - documentation: https://github.com/xjtu-omics/msisensor-pro/wiki - tool_dev_url: https://github.com/xjtu-omics/msisensor-pro - doi: "doi.org/10.1016/j.gpb.2020.02.001" - licence: ['Custom Licence'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - normal: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - normal_index: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - tumor: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - tumor_index: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - intervals: - type: file - description: bed file containing interval information, optional - pattern: "*.{bed}" - - fasta: - type: file - description: Reference genome - pattern: "*.{fasta}" - - msisensor_scan: - type: file - description: Output from msisensor-pro/scan, conaining list of msi regions - pattern: "*.list" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - output_report: - type: file - description: File containing final report with all detected microsatellites, unstable somatic microsatellites, msi score - - output_dis: - type: file - description: File containing distribution results - - output_germline: - type: file - description: File containing germline results - - output_somatic: - type: file - description: File containing somatic results - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - list: - type: file - description: File containing microsatellite list - pattern: "*.{list}" - -authors: - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/msisensorpro/scan/meta.yml b/modules/nf-core/modules/msisensorpro/scan/meta.yml deleted file mode 100644 index 72c1b84b7b..0000000000 --- a/modules/nf-core/modules/msisensorpro/scan/meta.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: msisensorpro_scan -description: MSIsensor-pro evaluates Microsatellite Instability (MSI) for cancer patients with next generation sequencing data. It accepts the whole genome sequencing, whole exome sequencing and target region (panel) sequencing data as input -keywords: - - micro-satellite-scan - - msisensor-pro - - scan -tools: - - msisensorpro: - description: Microsatellite Instability (MSI) detection using high-throughput sequencing data. - homepage: https://github.com/xjtu-omics/msisensor-pro - documentation: https://github.com/xjtu-omics/msisensor-pro/wiki - tool_dev_url: https://github.com/xjtu-omics/msisensor-pro - doi: "doi.org/10.1016/j.gpb.2020.02.001" - licence: ['Custom Licence'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Reference genome - pattern: "*.{fasta}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - list: - type: file - description: File containing microsatellite list - pattern: "*.{list}" - -authors: - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf deleted file mode 100644 index 0ff7cac11f..0000000000 --- a/modules/nf-core/modules/multiqc/main.nf +++ /dev/null @@ -1,31 +0,0 @@ -process MULTIQC { - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::multiqc=1.11' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" - - input: - path multiqc_files - - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - multiqc -f $args . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/modules/multiqc/meta.yml deleted file mode 100644 index 63c75a450a..0000000000 --- a/modules/nf-core/modules/multiqc/meta.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: MultiQC -description: Aggregate results from bioinformatics analyses across many samples into a single report -keywords: - - QC - - bioinformatics tools - - Beautiful stand-alone HTML report -tools: - - multiqc: - description: | - MultiQC searches a given directory for analysis logs and compiles a HTML report. - It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. - homepage: https://multiqc.info/ - documentation: https://multiqc.info/docs/ - licence: ['GPL-3.0-or-later'] -input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC -output: - - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" - - data: - type: dir - description: MultiQC data dir - pattern: "multiqc_data" - - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@abhi18av" - - "@bunop" - - "@drpatelh" diff --git a/modules/nf-core/modules/samblaster/main.nf b/modules/nf-core/modules/samblaster/main.nf deleted file mode 100644 index c881389acd..0000000000 --- a/modules/nf-core/modules/samblaster/main.nf +++ /dev/null @@ -1,37 +0,0 @@ -process SAMBLASTER { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::samblaster=0.1.26 bioconda::samtools=1.14" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:ba4a02b56f3e524a6e006bcd99fe8cc1d7fe09eb-0' : - 'quay.io/biocontainers/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:ba4a02b56f3e524a6e006bcd99fe8cc1d7fe09eb-0' }" - - input: - tuple val(meta), path(bam) - - output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def args3 = task.ext.args3 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - if( "$bam" == "${prefix}.bam" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - samtools view -h $args2 $bam | \\ - samblaster $args | \\ - samtools view $args3 -Sb - >${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samblaster: \$( samblaster -h 2>&1 | head -n 1 | sed 's/^samblaster: Version //' ) - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/samblaster/meta.yml b/modules/nf-core/modules/samblaster/meta.yml deleted file mode 100644 index 4d51f4fe8c..0000000000 --- a/modules/nf-core/modules/samblaster/meta.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: samblaster -description: | - This module combines samtools and samblaster in order to use - samblaster capability to filter or tag SAM files, with the advantage - of maintaining both input and output in BAM format. - Samblaster input must contain a sequence header: for this reason it has been piped - with the "samtools view -h" command. - Additional desired arguments for samtools can be passed using: - options.args2 for the input bam file - options.args3 for the output bam file -keywords: - - sort -tools: - - samblaster: - description: | - samblaster is a fast and flexible program for marking duplicates in read-id grouped paired-end SAM files. - It can also optionally output discordant read pairs and/or split read mappings to separate SAM files, - and/or unmapped/clipped reads to a separate FASTQ file. - By default, samblaster reads SAM input from stdin and writes SAM to stdout. - homepage: None - documentation: https://github.com/GregoryFaust/samblaster - tool_dev_url: https://github.com/GregoryFaust/samblaster - doi: "10.1093/bioinformatics/btu314" - licence: ['MIT'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file - pattern: "*.bam" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - bam: - type: file - description: Tagged or filtered BAM file - pattern: "*.bam" - -authors: - - "@lescai" diff --git a/modules/nf-core/modules/samtools/bam2fq/meta.yml b/modules/nf-core/modules/samtools/bam2fq/meta.yml deleted file mode 100644 index f35701c457..0000000000 --- a/modules/nf-core/modules/samtools/bam2fq/meta.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: samtools_bam2fq -description: | - The module uses bam2fq method from samtools to - convert a SAM, BAM or CRAM file to FASTQ format -keywords: - - bam2fq - - samtools - - fastq -tools: - - samtools: - description: Tools for dealing with SAM, BAM and CRAM files - homepage: None - documentation: http://www.htslib.org/doc/1.1/samtools.html - tool_dev_url: None - doi: "" - licence: ['MIT'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - inputbam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - split: - type: boolean - description: | - TRUE/FALSE value to indicate if reads should be separated into - /1, /2 and if present other, or singleton. - Note: choosing TRUE will generate 4 different files. - Choosing FALSE will produce a single file, which will be interleaved in case - the input contains paired reads. - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - reads: - type: file - description: | - FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton) - or a single interleaved .fq.gz file if the user chooses not to split the reads. - pattern: "*.fq.gz" - -authors: - - "@lescai" diff --git a/modules/nf-core/modules/samtools/faidx/main.nf b/modules/nf-core/modules/samtools/faidx/main.nf deleted file mode 100644 index b83a4952d8..0000000000 --- a/modules/nf-core/modules/samtools/faidx/main.nf +++ /dev/null @@ -1,32 +0,0 @@ -process SAMTOOLS_FAIDX { - tag "$fasta" - label 'process_low' - - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path ("*.fai"), emit: fai - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - samtools \\ - faidx \\ - $fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/samtools/faidx/meta.yml b/modules/nf-core/modules/samtools/faidx/meta.yml deleted file mode 100644 index bae97a39f5..0000000000 --- a/modules/nf-core/modules/samtools/faidx/meta.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: samtools_faidx -description: Index FASTA file -keywords: - - index - - fasta -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: http://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ['MIT'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: FASTA file - pattern: "*.{fa,fasta}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fai: - type: file - description: FASTA index file - pattern: "*.{fai}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@ewels" - - "@phue" diff --git a/modules/nf-core/modules/samtools/index/meta.yml b/modules/nf-core/modules/samtools/index/meta.yml deleted file mode 100644 index 0905b3cd69..0000000000 --- a/modules/nf-core/modules/samtools/index/meta.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: samtools_index -description: Index SAM/BAM/CRAM file -keywords: - - index - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ['MIT'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - crai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - csi: - type: file - description: CSI index file - pattern: "*.{csi}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@ewels" - - "@maxulysse" diff --git a/modules/nf-core/modules/samtools/merge/main.nf b/modules/nf-core/modules/samtools/merge/main.nf deleted file mode 100644 index be6fe32ebc..0000000000 --- a/modules/nf-core/modules/samtools/merge/main.nf +++ /dev/null @@ -1,41 +0,0 @@ -process SAMTOOLS_MERGE { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" - - input: - tuple val(meta), path(input_files) - path fasta - - output: - tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam - tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def file_type = input_files[0].getExtension() - def reference = fasta ? "--reference ${fasta}" : "" - """ - samtools \\ - merge \\ - --threads ${task.cpus-1} \\ - $args \\ - ${reference} \\ - ${prefix}.${file_type} \\ - $input_files - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/samtools/merge/meta.yml b/modules/nf-core/modules/samtools/merge/meta.yml deleted file mode 100644 index 2576a3a355..0000000000 --- a/modules/nf-core/modules/samtools/merge/meta.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: samtools_merge -description: Merge BAM or CRAM file -keywords: - - merge - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ['MIT'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input_files: - type: file - description: BAM/CRAM file - pattern: "*.{bam,cram,sam}" - - fasta: - type: optional file - description: Reference file the CRAM was created with - pattern: "*.{fasta,fa}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file - pattern: "*.{bam}" - - cram: - type: file - description: CRAM file - pattern: "*.{cram}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@yuukiiwa " - - "@maxulysse" - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/samtools/mpileup/main.nf b/modules/nf-core/modules/samtools/mpileup/main.nf deleted file mode 100644 index 77afae60a7..0000000000 --- a/modules/nf-core/modules/samtools/mpileup/main.nf +++ /dev/null @@ -1,35 +0,0 @@ -process SAMTOOLS_MPILEUP { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" - - input: - tuple val(meta), path(bam) - path fasta - - output: - tuple val(meta), path("*.mpileup"), emit: mpileup - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - samtools mpileup \\ - --fasta-ref $fasta \\ - --output ${prefix}.mpileup \\ - $args \\ - $bam - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/samtools/mpileup/meta.yml b/modules/nf-core/modules/samtools/mpileup/meta.yml deleted file mode 100644 index fac7a5bcc2..0000000000 --- a/modules/nf-core/modules/samtools/mpileup/meta.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: samtools_mpileup -description: BAM -keywords: - - mpileup - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ['MIT'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - fasta: - type: file - description: FASTA reference file - pattern: "*.{fasta,fa}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - mpileup: - type: file - description: mpileup file - pattern: "*.{mpileup}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@joseespinosa" diff --git a/modules/nf-core/modules/samtools/stats/meta.yml b/modules/nf-core/modules/samtools/stats/meta.yml deleted file mode 100644 index 869e62e373..0000000000 --- a/modules/nf-core/modules/samtools/stats/meta.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: samtools_stats -description: Produces comprehensive statistics from SAM/BAM/CRAM file -keywords: - - statistics - - counts - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ['MIT'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM file from alignment - pattern: "*.{bam,cram}" - - input_index: - type: file - description: BAI/CRAI file from alignment - pattern: "*.{bai,crai}" - - fasta: - type: optional file - description: Reference file the CRAM was created with - pattern: "*.{fasta,fa}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - stats: - type: file - description: File containing samtools stats output - pattern: "*.{stats}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/samtools/view/main.nf b/modules/nf-core/modules/samtools/view/main.nf deleted file mode 100644 index aee21a4eae..0000000000 --- a/modules/nf-core/modules/samtools/view/main.nf +++ /dev/null @@ -1,44 +0,0 @@ -process SAMTOOLS_VIEW { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" - - input: - tuple val(meta), path(input) - path fasta - - output: - tuple val(meta), path("*.bam") , emit: bam , optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "--reference ${fasta} -C" : "" - def file_type = input.getExtension() - if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - samtools \\ - view \\ - --threads ${task.cpus-1} \\ - ${reference} \\ - $args \\ - $input \\ - $args2 \\ - > ${prefix}.${file_type} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/samtools/view/meta.yml b/modules/nf-core/modules/samtools/view/meta.yml deleted file mode 100644 index 8abf34af69..0000000000 --- a/modules/nf-core/modules/samtools/view/meta.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: samtools_view -description: filter/convert SAM/BAM/CRAM file -keywords: - - view - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ['MIT'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - fasta: - type: optional file - description: Reference file the CRAM was created with - pattern: "*.{fasta,fa}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: filtered/converted BAM/SAM file - pattern: "*.{bam,sam}" - - cram: - type: file - description: filtered/converted CRAM file - pattern: "*.cram" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@joseespinosa" - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/seqkit/split2/main.nf b/modules/nf-core/modules/seqkit/split2/main.nf deleted file mode 100644 index ff91d86bd9..0000000000 --- a/modules/nf-core/modules/seqkit/split2/main.nf +++ /dev/null @@ -1,53 +0,0 @@ -process SEQKIT_SPLIT2 { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::seqkit=2.1.0' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqkit:2.1.0--h9ee0642_0' : - 'quay.io/biocontainers/seqkit:2.1.0--h9ee0642_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("**/*.gz"), emit: reads - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - if(meta.single_end){ - """ - seqkit \\ - split2 \\ - $args \\ - --threads $task.cpus \\ - $reads \\ - --out-dir ${prefix} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqkit: \$(echo \$(seqkit 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ - } else { - """ - seqkit \\ - split2 \\ - $args \\ - --threads $task.cpus \\ - --read1 ${reads[0]} \\ - --read2 ${reads[1]} \\ - --out-dir ${prefix} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqkit: \$(echo \$(seqkit 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ - } -} diff --git a/modules/nf-core/modules/seqkit/split2/meta.yml b/modules/nf-core/modules/seqkit/split2/meta.yml deleted file mode 100644 index 90eec7f9a5..0000000000 --- a/modules/nf-core/modules/seqkit/split2/meta.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: seqkit_split2 -description: Split single or paired-end fastq.gz files -keywords: - - split - - fastq -tools: - - seqkit: - description: | - Cross-platform and ultrafast toolkit for FASTA/Q file manipulation, written by Wei Shen. - homepage: https://github.com/shenwei356/seqkit - documentation: https://bioinf.shenwei.me/seqkit/ - doi: 10.1371/journal.pone.0163962 - licence: ['MIT'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: FastQ files - pattern: "*.{fq.gz/fastq.gz}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: Split fastq files - pattern: "*.{fq.gz/fastq.gz}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/snpeff/Dockerfile b/modules/nf-core/modules/snpeff/Dockerfile deleted file mode 100644 index 608716a4de..0000000000 --- a/modules/nf-core/modules/snpeff/Dockerfile +++ /dev/null @@ -1,22 +0,0 @@ -FROM nfcore/base:1.14 -LABEL \ - author="Maxime Garcia" \ - description="snpEff image for nf-core pipelines" \ - maintainer="maxime.garcia@scilifelab.se" - -# Install the conda environment -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a - -# Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-snpeff-5.0/bin:$PATH - -# Setup default ARG variables -ARG GENOME=GRCh38 -ARG SNPEFF_CACHE_VERSION=99 - -# Download Genome -RUN snpEff download -v ${GENOME}.${SNPEFF_CACHE_VERSION} - -# Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-snpeff-5.0 > nf-core-snpeff-5.0.yml diff --git a/modules/nf-core/modules/snpeff/build.sh b/modules/nf-core/modules/snpeff/build.sh deleted file mode 100644 index b94ffd6905..0000000000 --- a/modules/nf-core/modules/snpeff/build.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -# Build and push all containers - -build_push() { - GENOME=$1 - SNPEFF_CACHE_VERSION=$2 - SNPEFF_TAG=$3 - - docker build \ - -t nfcore/snpeff:${SNPEFF_TAG}.${GENOME} \ - software/snpeff/. \ - --build-arg GENOME=${GENOME} \ - --build-arg SNPEFF_CACHE_VERSION=${SNPEFF_CACHE_VERSION} - - docker push nfcore/snpeff:${SNPEFF_TAG}.${GENOME} -} - -build_push "GRCh37" "75" "5.0" -build_push "GRCh38" "99" "5.0" -build_push "GRCm38" "99" "5.0" -build_push "CanFam3.1" "99" "5.0" -build_push "WBcel235" "99" "5.0" diff --git a/modules/nf-core/modules/snpeff/environment.yml b/modules/nf-core/modules/snpeff/environment.yml deleted file mode 100644 index ad0523fbd3..0000000000 --- a/modules/nf-core/modules/snpeff/environment.yml +++ /dev/null @@ -1,10 +0,0 @@ -# You can use this file to create a conda environment for this module: -# conda env create -f environment.yml -name: nf-core-snpeff-5.0 -channels: - - conda-forge - - bioconda - - defaults - -dependencies: - - bioconda::snpeff=5.0 diff --git a/modules/nf-core/modules/snpeff/main.nf b/modules/nf-core/modules/snpeff/main.nf deleted file mode 100644 index 1b4d5f4309..0000000000 --- a/modules/nf-core/modules/snpeff/main.nf +++ /dev/null @@ -1,48 +0,0 @@ -process SNPEFF { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::snpeff=5.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/snpeff:5.0--hdfd78af_1' : - 'quay.io/biocontainers/snpeff:5.0--hdfd78af_1' }" - - input: - tuple val(meta), path(vcf) - val db - path cache - - output: - tuple val(meta), path("*.ann.vcf"), emit: vcf - path "*.csv" , emit: report - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def avail_mem = 6 - if (!task.memory) { - log.info '[snpEff] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.prefix ?: "${meta.id}" - def cache_command = cache ? "-dataDir \${PWD}/${cache}" : "" - """ - snpEff \\ - -Xmx${avail_mem}g \\ - $db \\ - $args \\ - -csvStats ${prefix}.csv \\ - $cache_command \\ - $vcf \\ - > ${prefix}.ann.vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/snpeff/meta.yml b/modules/nf-core/modules/snpeff/meta.yml deleted file mode 100644 index 8ba1868364..0000000000 --- a/modules/nf-core/modules/snpeff/meta.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: snpEff -description: Genetic variant annotation and functional effect prediction toolbox -keywords: - - annotation -tools: - - snpeff: - description: | - SnpEff is a variant annotation and effect prediction tool. - It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). - homepage: https://pcingola.github.io/SnpEff/ - documentation: https://pcingola.github.io/SnpEff/se_introduction/ - licence: ['MIT'] -params: - - use_cache: - type: boolean - description: | - boolean to enable the usage of containers with cache - Enable the usage of containers with cache - Does not work with conda - - snpeff_tag: - type: value - description: | - Specify the tag for the container - https://hub.docker.com/r/nfcore/snpeff/tags -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - vcf: - type: file - description: | - vcf to annotate - - db: - type: value - description: | - which db to annotate with - - cache: - type: file - description: | - path to snpEff cache (optional) -output: - - vcf: - type: file - description: | - annotated vcf - pattern: "*.ann.vcf" - - report: - type: file - description: snpEff report file - pattern: "*.html" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@maxulysse" diff --git a/modules/nf-core/modules/strelka/germline/main.nf b/modules/nf-core/modules/strelka/germline/main.nf deleted file mode 100644 index 3f47d86f11..0000000000 --- a/modules/nf-core/modules/strelka/germline/main.nf +++ /dev/null @@ -1,48 +0,0 @@ -process STRELKA_GERMLINE { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? "bioconda::strelka=2.9.10" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/strelka:2.9.10--0' : - 'quay.io/biocontainers/strelka:2.9.10--0' }" - - input: - tuple val(meta), path(input), path(input_index), path (target_bed), path (target_bed_tbi) - path fasta - path fai - - output: - tuple val(meta), path("*variants.vcf.gz") , emit: vcf - tuple val(meta), path("*variants.vcf.gz.tbi"), emit: vcf_tbi - tuple val(meta), path("*genome.vcf.gz") , emit: genome_vcf - tuple val(meta), path("*genome.vcf.gz.tbi") , emit: genome_vcf_tbi - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def regions = target_bed ? "--callRegions ${target_bed}" : "" - """ - configureStrelkaGermlineWorkflow.py \\ - --bam $input \\ - --referenceFasta $fasta \\ - $regions \\ - $args \\ - --runDir strelka - - python strelka/runWorkflow.py -m local -j $task.cpus - mv strelka/results/variants/genome.*.vcf.gz ${prefix}.genome.vcf.gz - mv strelka/results/variants/genome.*.vcf.gz.tbi ${prefix}.genome.vcf.gz.tbi - mv strelka/results/variants/variants.vcf.gz ${prefix}.variants.vcf.gz - mv strelka/results/variants/variants.vcf.gz.tbi ${prefix}.variants.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - strelka: \$( configureStrelkaGermlineWorkflow.py --version ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/strelka/germline/meta.yml b/modules/nf-core/modules/strelka/germline/meta.yml deleted file mode 100644 index 2eeb0f8fa8..0000000000 --- a/modules/nf-core/modules/strelka/germline/meta.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: strelka_germline -description: Strelka2 is a fast and accurate small variant caller optimized for analysis of germline variation -keywords: - - variantcalling - - germline - - wgs - - vcf - - variants -tools: - - strelka: - description: Strelka calls somatic and germline small variants from mapped sequencing reads - homepage: https://github.com/Illumina/strelka - documentation: https://github.com/Illumina/strelka/blob/v2.9.x/docs/userGuide/README.md - tool_dev_url: https://github.com/Illumina/strelka - doi: 10.1038/s41592-018-0051-x - licence: ['GPL v3'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test'] - - input: - type: file - description: BAM/CRAM file - pattern: "*.{bam,cram}" - - input_index: - type: file - description: BAM/CRAI index file - pattern: "*.{bai,crai}" - - target_bed: - type: file - description: An optional bed file - pattern: "*.{bed}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test'] - - vcf: - type: file - description: gzipped germline variant file - pattern: "*.{vcf.gz}" - - vcf_tbi: - type: file - description: index file for the vcf file - pattern: "*.vcf.gz.tbi" - - genome_vcf: - type: file - description: variant records and compressed non-variant blocks - pattern: "*_genome.vcf.gz" - - genome_vcf_tbi: - type: file - description: index file for the genome_vcf file - pattern: "*_genome.vcf.gz.tbi" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@arontommi" diff --git a/modules/nf-core/modules/strelka/somatic/main.nf b/modules/nf-core/modules/strelka/somatic/main.nf deleted file mode 100644 index b61c09bd8c..0000000000 --- a/modules/nf-core/modules/strelka/somatic/main.nf +++ /dev/null @@ -1,53 +0,0 @@ -process STRELKA_SOMATIC { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? "bioconda::strelka=2.9.10" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/strelka:2.9.10--h9ee0642_1' : - 'quay.io/biocontainers/strelka:2.9.10--h9ee0642_1' }" - - input: - tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi), path(target_bed), path(target_bed_index) - path fasta - path fai - - output: - tuple val(meta), path("*.somatic_indels.vcf.gz") , emit: vcf_indels - tuple val(meta), path("*.somatic_indels.vcf.gz.tbi"), emit: vcf_indels_tbi - tuple val(meta), path("*.somatic_snvs.vcf.gz") , emit: vcf_snvs - tuple val(meta), path("*.somatic_snvs.vcf.gz.tbi") , emit: vcf_snvs_tbi - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def options_target_bed = target_bed ? "--callRegions ${target_bed}" : "" - def options_manta = manta_candidate_small_indels ? "--indelCandidates ${manta_candidate_small_indels}" : "" - """ - - configureStrelkaSomaticWorkflow.py \\ - --tumor $input_tumor \\ - --normal $input_normal \\ - --referenceFasta $fasta \\ - ${options_target_bed} \\ - ${options_manta} \\ - $args \\ - --runDir strelka - - python strelka/runWorkflow.py -m local -j $task.cpus - - mv strelka/results/variants/somatic.indels.vcf.gz ${prefix}.somatic_indels.vcf.gz - mv strelka/results/variants/somatic.indels.vcf.gz.tbi ${prefix}.somatic_indels.vcf.gz.tbi - mv strelka/results/variants/somatic.snvs.vcf.gz ${prefix}.somatic_snvs.vcf.gz - mv strelka/results/variants/somatic.snvs.vcf.gz.tbi ${prefix}.somatic_snvs.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - strelka: \$( configureStrelkaSomaticWorkflow.py --version ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/strelka/somatic/meta.yml b/modules/nf-core/modules/strelka/somatic/meta.yml deleted file mode 100644 index 076c103611..0000000000 --- a/modules/nf-core/modules/strelka/somatic/meta.yml +++ /dev/null @@ -1,93 +0,0 @@ -name: strelka_somatic -description: Strelka2 is a fast and accurate small variant caller optimized for analysis of germline variation in small cohorts and somatic variation in tumor/normal sample pairs -keywords: - - variant calling - - germline - - wgs - - vcf - - variants -tools: - - strelka: - description: Strelka calls somatic and germline small variants from mapped sequencing reads - homepage: https://github.com/Illumina/strelka - documentation: https://github.com/Illumina/strelka/blob/v2.9.x/docs/userGuide/README.md - tool_dev_url: https://github.com/Illumina/strelka - doi: 10.1038/s41592-018-0051-x - licence: ['GPL v3'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input_normal: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - input_index_normal: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - input_tumor: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - input_index_tumor: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - manta_candidate_small_indels: - type: file - description: VCF.gz file - pattern: "*.{vcf.gz}" - - manta_candidate_small_indels_tbi: - type: file - description: VCF.gz index file - pattern: "*.tbi" - - fasta: - type: file - description: Genome reference FASTA file - pattern: "*.{fa,fasta}" - - fai: - type: file - description: Genome reference FASTA index file - pattern: "*.{fa.fai,fasta.fai}" - - target_bed: - type: file - description: BED file containing target regions for variant calling - pattern: "*.{bed}" - - target_bed_tbi: - type: file - description: Index for BED file containing target regions for variant calling - pattern: "*.{bed.tbi}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - vcf_indels: - type: file - description: Gzipped VCF file containing variants - pattern: "*.{vcf.gz}" - - vcf_indels_tbi: - type: file - description: Index for gzipped VCF file containing variants - pattern: "*.{vcf.gz.tbi}" - - vcf_snvs: - type: file - description: Gzipped VCF file containing variants - pattern: "*.{vcf.gz}" - - vcf_snvs_tbi: - type: file - description: Index for gzipped VCF file containing variants - pattern: "*.{vcf.gz.tbi}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" diff --git a/modules/nf-core/modules/tabix/bgziptabix/main.nf b/modules/nf-core/modules/tabix/bgziptabix/main.nf deleted file mode 100644 index f0fe1e5c98..0000000000 --- a/modules/nf-core/modules/tabix/bgziptabix/main.nf +++ /dev/null @@ -1,30 +0,0 @@ -process TABIX_BGZIPTABIX { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::tabix=1.11' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : - 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" - - input: - tuple val(meta), path(input) - - output: - tuple val(meta), path("*.gz"), path("*.tbi"), emit: gz_tbi - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - bgzip --threads ${task.cpus} -c $args $input > ${prefix}.gz - tabix $args2 ${prefix}.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/tabix/bgziptabix/meta.yml b/modules/nf-core/modules/tabix/bgziptabix/meta.yml deleted file mode 100644 index f2aed84d13..0000000000 --- a/modules/nf-core/modules/tabix/bgziptabix/meta.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: tabix_bgziptabix -description: bgzip a sorted tab-delimited genome file and then create tabix index -keywords: - - bgzip - - compress - - index - - tabix - - vcf -tools: - - tabix: - description: Generic indexer for TAB-delimited genome position files. - homepage: https://www.htslib.org/doc/tabix.html - documentation: https://www.htslib.org/doc/tabix.1.html - doi: 10.1093/bioinformatics/btq671 - licence: ['MIT'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - tab: - type: file - description: TAB-delimited genome position file - pattern: "*.{bed,gff,sam,vcf}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - gz: - type: file - description: Output compressed file - pattern: "*.{gz}" - - tbi: - type: file - description: tabix index file - pattern: "*.{gz.tbi}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@maxulysse" diff --git a/modules/nf-core/modules/tabix/tabix/main.nf b/modules/nf-core/modules/tabix/tabix/main.nf deleted file mode 100644 index 5f51626146..0000000000 --- a/modules/nf-core/modules/tabix/tabix/main.nf +++ /dev/null @@ -1,30 +0,0 @@ -process TABIX_TABIX { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::tabix=1.11' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : - 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" - - input: - tuple val(meta), path(tab) - - output: - tuple val(meta), path("*.tbi"), emit: tbi - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - tabix $args $tab - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/tabix/tabix/meta.yml b/modules/nf-core/modules/tabix/tabix/meta.yml deleted file mode 100644 index 2e37c4ff90..0000000000 --- a/modules/nf-core/modules/tabix/tabix/meta.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: tabix_tabix -description: create tabix index from a sorted bgzip tab-delimited genome file -keywords: - - index - - tabix - - vcf -tools: - - tabix: - description: Generic indexer for TAB-delimited genome position files. - homepage: https://www.htslib.org/doc/tabix.html - documentation: https://www.htslib.org/doc/tabix.1.html - doi: 10.1093/bioinformatics/btq671 - licence: ['MIT'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - tab: - type: file - description: TAB-delimited genome position file compressed with bgzip - pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - tbi: - type: file - description: tabix index file - pattern: "*.{tbi}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" - - "@maxulysse" diff --git a/modules/nf-core/modules/tiddit/sv/main.nf b/modules/nf-core/modules/tiddit/sv/main.nf deleted file mode 100644 index 454dfc54ce..0000000000 --- a/modules/nf-core/modules/tiddit/sv/main.nf +++ /dev/null @@ -1,41 +0,0 @@ -process TIDDIT_SV { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::tiddit=2.12.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tiddit:2.12.1--py38h1773678_0' : - 'quay.io/biocontainers/tiddit:2.12.1--py38h1773678_0' }" - - input: - tuple val(meta), path(bam) - path fasta - path fai - - output: - tuple val(meta), path("*.vcf") , emit: vcf - tuple val(meta), path("*.ploidy.tab") , emit: ploidy - tuple val(meta), path("*.signals.tab"), emit: signals - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta == "dummy_file.txt" ? "--ref $fasta" : "" - """ - tiddit \\ - --sv \\ - $args \\ - --bam $bam \\ - $reference \\ - -o $prefix - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*TIDDIT-//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/tiddit/sv/meta.yml b/modules/nf-core/modules/tiddit/sv/meta.yml deleted file mode 100644 index f788ffa66a..0000000000 --- a/modules/nf-core/modules/tiddit/sv/meta.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: tiddit_sv -description: Identify chromosomal rearrangements. -keywords: - - structural - - variants - - vcf -tools: - - sv: - description: Search for structural variants. - homepage: https://github.com/SciLifeLab/TIDDIT - documentation: https://github.com/SciLifeLab/TIDDIT/blob/master/README.md - doi: 10.12688/f1000research.11168.1 - licence: ['GPL-3.0-or-later'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Input FASTA file - pattern: "*.{fasta,fa}" - - fai: - type: file - description: FASTA index file - pattern: "*.{fai}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - vcf: - type: file - description: vcf - pattern: "*.{vcf}" - - ploidy: - type: file - description: tab - pattern: "*.{ploidy.tab}" - - signals: - type: file - description: tab - pattern: "*.{signals.tab}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@maxulysse" diff --git a/modules/nf-core/modules/trimgalore/main.nf b/modules/nf-core/modules/trimgalore/main.nf deleted file mode 100644 index 9487c7990b..0000000000 --- a/modules/nf-core/modules/trimgalore/main.nf +++ /dev/null @@ -1,83 +0,0 @@ -process TRIMGALORE { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? 'bioconda::trim-galore=0.6.7' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/trim-galore:0.6.7--hdfd78af_0' : - 'quay.io/biocontainers/trim-galore:0.6.7--hdfd78af_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.fq.gz") , emit: reads - tuple val(meta), path("*report.txt"), emit: log - path "versions.yml" , emit: versions - - tuple val(meta), path("*.html"), emit: html optional true - tuple val(meta), path("*.zip") , emit: zip optional true - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Calculate number of --cores for TrimGalore based on value of task.cpus - // See: https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019 - // See: https://github.com/nf-core/atacseq/pull/65 - def cores = 1 - if (task.cpus) { - cores = (task.cpus as int) - 4 - if (meta.single_end) cores = (task.cpus as int) - 3 - if (cores < 1) cores = 1 - if (cores > 4) cores = 4 - } - - // Clipping presets have to be evaluated in the context of SE/PE - def c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : '' - def c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '' - def tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' - def tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' - - // Added soft-links to original fastqs for consistent naming in MultiQC - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - trim_galore \\ - $args \\ - --cores $cores \\ - --gzip \\ - $c_r1 \\ - $tpc_r1 \\ - ${prefix}.fastq.gz - cat <<-END_VERSIONS > versions.yml - "${task.process}": - trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') - cutadapt: \$(cutadapt --version) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - trim_galore \\ - $args \\ - --cores $cores \\ - --paired \\ - --gzip \\ - $c_r1 \\ - $c_r2 \\ - $tpc_r1 \\ - $tpc_r2 \\ - ${prefix}_1.fastq.gz \\ - ${prefix}_2.fastq.gz - cat <<-END_VERSIONS > versions.yml - "${task.process}": - trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') - cutadapt: \$(cutadapt --version) - END_VERSIONS - """ - } -} diff --git a/modules/nf-core/modules/trimgalore/meta.yml b/modules/nf-core/modules/trimgalore/meta.yml deleted file mode 100644 index c7e1df1de7..0000000000 --- a/modules/nf-core/modules/trimgalore/meta.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: trimgalore -description: Trim FastQ files using Trim Galore! -keywords: - - trimming - - adapters - - sequencing adapters - - fastq -tools: - - trimgalore: - description: | - A wrapper tool around Cutadapt and FastQC to consistently apply quality - and adapter trimming to FastQ files, with some extra functionality for - MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ - documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md - licence: ['GPL-3.0-or-later'] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input adapter trimmed FastQ files of size 1 and 2 for - single-end and paired-end data, respectively. - pattern: "*.{fq.gz}" - - html: - type: file - description: FastQC report (optional) - pattern: "*_{fastqc.html}" - - zip: - type: file - description: FastQC report archive (optional) - pattern: "*_{fastqc.zip}" - - log: - type: file - description: Trim Galore! trimming report - pattern: "*_{report.txt}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@ewels" - - "@FelixKrueger" diff --git a/modules/nf-core/modules/vcftools/meta.yml b/modules/nf-core/modules/vcftools/meta.yml deleted file mode 100644 index a8f864a9c8..0000000000 --- a/modules/nf-core/modules/vcftools/meta.yml +++ /dev/null @@ -1,294 +0,0 @@ -name: vcftools -description: A set of tools written in Perl and C++ for working with VCF files -keywords: VCF - - sort -tools: - - vcftools: - description: A set of tools written in Perl and C++ for working with VCF files. This package only contains the C++ libraries whereas the package perl-vcftools-vcf contains the perl libraries - homepage: http://vcftools.sourceforge.net/ - documentation: http://vcftools.sourceforge.net/man_latest.html - tool_dev_url: None - doi: - licence: ['LGPL'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - variant_file: - type: file - description: variant input file which can be vcf, vcf.gz, or bcf format. - - bed: - type: file - description: bed file which can be used with different arguments in vcftools (optional) - - diff_variant_file: - type: file - description: secondary variant file which can be used with the 'diff' suite of tools (optional) - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - vcf: - type: file - description: vcf file (optional) - pattern: "*.vcf" - - bcf: - type: file - description: bcf file (optional) - pattern: "*.bcf" - - frq: - type: file - description: Allele frequency for each site (optional) - pattern: "*.frq" - - frq_count: - type: file - description: Allele counts for each site (optional) - pattern: "*.frq.count" - - idepth: - type: file - description: mean depth per individual (optional) - pattern: "*.idepth" - - ldepth: - type: file - description: depth per site summed across individuals (optional) - pattern: "*.ildepth" - - ldepth_mean: - type: file - description: mean depth per site calculated across individuals (optional) - pattern: "*.ldepth.mean" - - gdepth: - type: file - description: depth for each genotype in vcf file (optional) - pattern: "*.gdepth" - - hap_ld: - type: file - description: r2, D, and D’ statistics using phased haplotypes (optional) - pattern: "*.hap.ld" - - geno_ld: - type: file - description: squared correlation coefficient between genotypes encoded as 0, 1 and 2 to represent the number of non-reference alleles in each individual (optional) - pattern: "*.geno.ld" - - geno_chisq: - type: file - description: test for genotype independence via the chi-squared statistic (optional) - pattern: "*.geno.chisq" - - list_hap_ld: - type: file - description: r2 statistics of the sites contained in the provided input file verses all other sites (optional) - pattern: "*.list.hap.ld" - - list_geno_ld: - type: file - description: r2 statistics of the sites contained in the provided input file verses all other sites (optional) - pattern: "*.list.geno.ld" - - interchrom_hap_ld: - type: file - description: r2 statistics for sites (haplotypes) on different chromosomes (optional) - pattern: "*.interchrom.hap.ld" - - interchrom_geno_ld: - type: file - description: r2 statistics for sites (genotypes) on different chromosomes (optional) - pattern: "*.interchrom.geno.ld" - - tstv: - type: file - description: Transition / Transversion ratio in bins of size defined in options (optional) - pattern: "*.TsTv" - - tstv_summary: - type: file - description: Summary of all Transitions and Transversions (optional) - pattern: "*.TsTv.summary" - - tstv_count: - type: file - description: Transition / Transversion ratio as a function of alternative allele count (optional) - pattern: "*.TsTv.count" - - tstv_qual: - type: file - description: Transition / Transversion ratio as a function of SNP quality threshold (optional) - pattern: "*.TsTv.qual" - - filter_summary: - type: file - description: Summary of the number of SNPs and Ts/Tv ratio for each FILTER category (optional) - pattern: "*.FILTER.summary" - - sites_pi: - type: file - description: Nucleotide divergency on a per-site basis (optional) - pattern: "*.sites.pi" - - windowed_pi: - type: file - description: Nucleotide diversity in windows, with window size determined by options (optional) - pattern: "*windowed.pi" - - weir_fst: - type: file - description: Fst estimate from Weir and Cockerham’s 1984 paper (optional) - pattern: "*.weir.fst" - - heterozygosity: - type: file - description: Heterozygosity on a per-individual basis (optional) - pattern: "*.het" - - hwe: - type: file - description: Contains the Observed numbers of Homozygotes and Heterozygotes and the corresponding Expected numbers under HWE (optional) - pattern: "*.hwe" - - tajima_d: - type: file - description: Tajima’s D statistic in bins with size of the specified number in options (optional) - pattern: "*.Tajima.D" - - freq_burden: - type: file - description: Number of variants within each individual of a specific frequency in options (optional) - pattern: "*.ifreqburden" - - lroh: - type: file - description: Long Runs of Homozygosity (optional) - pattern: "*.LROH" - - relatedness: - type: file - description: Relatedness statistic based on the method of Yang et al, Nature Genetics 2010 (doi:10.1038/ng.608) (optional) - pattern: "*.relatedness" - - relatedness2: - type: file - description: Relatedness statistic based on the method of Manichaikul et al., BIOINFORMATICS 2010 (doi:10.1093/bioinformatics/btq559) (optional) - pattern: "*.relatedness2" - - lqual: - type: file - description: per-site SNP quality (optional) - pattern: "*.lqual" - - missing_individual: - type: file - description: Missingness on a per-individual basis (optional) - pattern: "*.imiss" - - missing_site: - type: file - description: Missingness on a per-site basis (optional) - pattern: "*.lmiss" - - snp_density: - type: file - description: Number and density of SNPs in bins of size defined by option (optional) - pattern: "*.snpden" - - kept_sites: - type: file - description: All sites that have been kept after filtering (optional) - pattern: "*.kept.sites" - - removed_sites: - type: file - description: All sites that have been removed after filtering (optional) - pattern: "*.removed.sites" - - singeltons: - type: file - description: Location of singletons, and the individual they occur in (optional) - pattern: "*.singeltons" - - indel_hist: - type: file - description: Histogram file of the length of all indels (including SNPs) (optional) - pattern: "*.indel_hist" - - hapcount: - type: file - description: Unique haplotypes within user specified bins (optional) - pattern: "*.hapcount" - - mendel: - type: file - description: Mendel errors identified in trios (optional) - pattern: "*.mendel" - - format: - type: file - description: Extracted information from the genotype fields in the VCF file relating to a specfied FORMAT identifier (optional) - pattern: "*.FORMAT" - - info: - type: file - description: Extracted information from the INFO field in the VCF file (optional) - pattern: "*.INFO" - - genotypes_matrix: - type: file - description: | - Genotypes output as large matrix. - Genotypes of each individual on a separate line. - Genotypes are represented as 0, 1 and 2, where the number represent that number of non-reference alleles. - Missing genotypes are represented by -1 (optional) - pattern: "*.012" - - genotypes_matrix_individual: - type: file - description: Details the individuals included in the main genotypes_matrix file (optional) - pattern: "*.012.indv" - - genotypes_matrix_position: - type: file - description: Details the site locations included in the main genotypes_matrix file (optional) - pattern: "*.012.pos" - - impute_hap: - type: file - description: Phased haplotypes in IMPUTE reference-panel format (optional) - pattern: "*.impute.hap" - - impute_hap_legend: - type: file - description: Impute haplotype legend file (optional) - pattern: "*.impute.hap.legend" - - impute_hap_indv: - type: file - description: Impute haplotype individuals file (optional) - pattern: "*.impute.hap.indv" - - ldhat_sites: - type: file - description: Output data in LDhat format, sites (optional) - pattern: "*.ldhat.sites" - - ldhat_locs: - type: file - description: output data in LDhat format, locations (optional) - pattern: "*.ldhat.locs" - - beagle_gl: - type: file - description: Genotype likelihoods for biallelic sites (optional) - pattern: "*.BEAGLE.GL" - - beagle_pl: - type: file - description: Genotype likelihoods for biallelic sites (optional) - pattern: "*.BEAGLE.PL" - - ped: - type: file - description: output the genotype data in PLINK PED format (optional) - pattern: "*.ped" - - map_: - type: file - description: output the genotype data in PLINK PED format (optional) - pattern: "*.map" - - tped: - type: file - description: output the genotype data in PLINK PED format (optional) - pattern: "*.tped" - - tfam: - type: file - description: output the genotype data in PLINK PED format (optional) - pattern: "*.tfam" - - diff_sites_in_files: - type: file - description: Sites that are common / unique to each file specified in optional inputs (optional) - pattern: "*.diff.sites.in.files" - - diff_indv_in_files: - type: file - description: Individuals that are common / unique to each file specified in optional inputs (optional) - pattern: "*.diff.indv.in.files" - - diff_sites: - type: file - description: Discordance on a site by site basis, specified in optional inputs (optional) - pattern: "*.diff.sites" - - diff_indv: - type: file - description: Discordance on a individual by individual basis, specified in optional inputs (optional) - pattern: "*.diff.indv" - - diff_discd_matrix: - type: file - description: Discordance matrix between files specified in optional inputs (optional) - pattern: "*.diff.discordance.matrix" - - diff_switch_error: - type: file - description: Switch errors found between sites (optional) - pattern: "*.diff.switch" - -authors: - - "@Mark-S-Hill" diff --git a/modules/nf-core/mosdepth/environment.yml b/modules/nf-core/mosdepth/environment.yml new file mode 100644 index 0000000000..f871e054e4 --- /dev/null +++ b/modules/nf-core/mosdepth/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/mosdepth + - mosdepth=0.3.10 diff --git a/modules/nf-core/mosdepth/main.nf b/modules/nf-core/mosdepth/main.nf new file mode 100644 index 0000000000..3bf945f909 --- /dev/null +++ b/modules/nf-core/mosdepth/main.nf @@ -0,0 +1,80 @@ +process MOSDEPTH { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mosdepth:0.3.10--h4e814b3_1' : + 'biocontainers/mosdepth:0.3.10--h4e814b3_1'}" + + input: + tuple val(meta), path(bam), path(bai), path(bed) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path('*.global.dist.txt') , emit: global_txt + tuple val(meta), path('*.summary.txt') , emit: summary_txt + tuple val(meta), path('*.region.dist.txt') , optional:true, emit: regions_txt + tuple val(meta), path('*.per-base.d4') , optional:true, emit: per_base_d4 + tuple val(meta), path('*.per-base.bed.gz') , optional:true, emit: per_base_bed + tuple val(meta), path('*.per-base.bed.gz.csi') , optional:true, emit: per_base_csi + tuple val(meta), path('*.regions.bed.gz') , optional:true, emit: regions_bed + tuple val(meta), path('*.regions.bed.gz.csi') , optional:true, emit: regions_csi + tuple val(meta), path('*.quantized.bed.gz') , optional:true, emit: quantized_bed + tuple val(meta), path('*.quantized.bed.gz.csi') , optional:true, emit: quantized_csi + tuple val(meta), path('*.thresholds.bed.gz') , optional:true, emit: thresholds_bed + tuple val(meta), path('*.thresholds.bed.gz.csi'), optional:true, emit: thresholds_csi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--fasta ${fasta}" : "" + def interval = bed ? "--by ${bed}" : "" + if (bed && args.contains("--by")) { + error "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" + } + if (!bed && args.contains("--thresholds")) { + error "'--thresholds' can only be specified in conjunction with '--by'" + } + + """ + mosdepth \\ + --threads $task.cpus \\ + $interval \\ + $reference \\ + $args \\ + $prefix \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.global.dist.txt + touch ${prefix}.region.dist.txt + touch ${prefix}.summary.txt + touch ${prefix}.per-base.d4 + echo "" | gzip > ${prefix}.per-base.bed.gz + touch ${prefix}.per-base.bed.gz.csi + echo "" | gzip > ${prefix}.regions.bed.gz + touch ${prefix}.regions.bed.gz.csi + echo "" | gzip > ${prefix}.quantized.bed.gz + touch ${prefix}.quantized.bed.gz.csi + echo "" | gzip > ${prefix}.thresholds.bed.gz + touch ${prefix}.thresholds.bed.gz.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/mosdepth/meta.yml b/modules/nf-core/mosdepth/meta.yml new file mode 100644 index 0000000000..dc783c9006 --- /dev/null +++ b/modules/nf-core/mosdepth/meta.yml @@ -0,0 +1,179 @@ +name: mosdepth +description: Calculates genome-wide sequencing coverage. +keywords: + - mosdepth + - bam + - cram + - coverage +tools: + - mosdepth: + description: | + Fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. + documentation: https://github.com/brentp/mosdepth + doi: 10.1093/bioinformatics/btx699 + licence: ["MIT"] + identifier: biotools:mosdepth +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM/CRAM file + pattern: "*.{bam,cram}" + - bai: + type: file + description: Index for BAM/CRAM file + pattern: "*.{bai,crai}" + - bed: + type: file + description: BED file with intersected intervals + pattern: "*.{bed}" + - - meta2: + type: map + description: | + Groovy Map containing bed information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta}" +output: + - global_txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.global.dist.txt": + type: file + description: Text file with global cumulative coverage distribution + pattern: "*.{global.dist.txt}" + - summary_txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.summary.txt": + type: file + description: Text file with summary mean depths per chromosome and regions + pattern: "*.{summary.txt}" + - regions_txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.region.dist.txt": + type: file + description: Text file with region cumulative coverage distribution + pattern: "*.{region.dist.txt}" + - per_base_d4: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.per-base.d4": + type: file + description: D4 file with per-base coverage + pattern: "*.{per-base.d4}" + - per_base_bed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.per-base.bed.gz": + type: file + description: BED file with per-base coverage + pattern: "*.{per-base.bed.gz}" + - per_base_csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.per-base.bed.gz.csi": + type: file + description: Index file for BED file with per-base coverage + pattern: "*.{per-base.bed.gz.csi}" + - regions_bed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.regions.bed.gz": + type: file + description: BED file with per-region coverage + pattern: "*.{regions.bed.gz}" + - regions_csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.regions.bed.gz.csi": + type: file + description: Index file for BED file with per-region coverage + pattern: "*.{regions.bed.gz.csi}" + - quantized_bed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.quantized.bed.gz": + type: file + description: BED file with binned coverage + pattern: "*.{quantized.bed.gz}" + - quantized_csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.quantized.bed.gz.csi": + type: file + description: Index file for BED file with binned coverage + pattern: "*.{quantized.bed.gz.csi}" + - thresholds_bed: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.thresholds.bed.gz": + type: file + description: BED file with the number of bases in each region that are covered + at or above each threshold + pattern: "*.{thresholds.bed.gz}" + - thresholds_csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.thresholds.bed.gz.csi": + type: file + description: Index file for BED file with threshold coverage + pattern: "*.{thresholds.bed.gz.csi}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@ramprasadn" + - "@matthdsm" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@ramprasadn" + - "@matthdsm" diff --git a/modules/nf-core/msisensor2/msi/environment.yml b/modules/nf-core/msisensor2/msi/environment.yml new file mode 100644 index 0000000000..4061123fb9 --- /dev/null +++ b/modules/nf-core/msisensor2/msi/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::msisensor2=0.1 diff --git a/modules/nf-core/msisensor2/msi/main.nf b/modules/nf-core/msisensor2/msi/main.nf new file mode 100644 index 0000000000..a15e2f66e2 --- /dev/null +++ b/modules/nf-core/msisensor2/msi/main.nf @@ -0,0 +1,52 @@ +process MSISENSOR2_MSI { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/msisensor2:0.1--hd03093a_0' : + 'biocontainers/msisensor2:0.1--hd03093a_0'}" + + input: + tuple val(meta), path(tumor_bam), path(tumor_bam_index) + tuple val(meta2), path(models) + + output: + tuple val(meta), path("${prefix}"), emit: msi + tuple val(meta), path("${prefix}_dis"), emit: distribution + tuple val(meta), path("${prefix}_somatic"), emit: somatic + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + msisensor2 msi \\ + -b ${task.cpus} \\ + ${args} \\ + -M ${models} \\ + -t ${tumor_bam} \\ + -o ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + msisensor2: \$(echo \$(msisensor2 2> >(grep Version) | sed 's/Version: v//g')) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix} + touch ${prefix}_dis + touch ${prefix}_somatic + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + msisensor2: \$(echo \$(msisensor2 2> >(grep Version) | sed 's/Version: v//g')) + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/nf-core/msisensor2/msi/meta.yml b/modules/nf-core/msisensor2/msi/meta.yml new file mode 100644 index 0000000000..9cece656ac --- /dev/null +++ b/modules/nf-core/msisensor2/msi/meta.yml @@ -0,0 +1,89 @@ +name: "msisensor2_msi" +description: msisensor2 detection of MSI regions. +keywords: + - msi + - microsatellite + - microsatellite instability + - tumor + - cfDNA +tools: + - "msisensor2": + description: "MSIsensor2 is a novel algorithm based machine learning, featuring + a large upgrade in the microsatellite instability (MSI) detection for tumor + only sequencing data, including Cell-Free DNA (cfDNA), Formalin-Fixed Paraffin-Embedded(FFPE) + and other sample types. The original MSIsensor is specially designed for tumor/normal + paired sequencing data." + homepage: "https://github.com/niu-lab/msisensor2" + documentation: "https://github.com/niu-lab/msisensor2/blob/master/README.md" + tool_dev_url: "https://github.com/niu-lab/msisensor2" + license: ["GPL-3.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tumor_bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - tumor_bam_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - models: + type: file + description: Folder of MSISensor2 models (available from Github or as a + product of msisensor2/scan) + pattern: "*/*" + ontologies: [] +output: + msi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}: + type: file + description: MSI classifications as a text file + ontologies: [] + distribution: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}_dis: + type: file + description: Read count distributions of MSI regions + ontologies: [] + somatic: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}_somatic: + type: file + description: Somatic MSI regions detected. + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" diff --git a/modules/nf-core/msisensorpro/msisomatic/environment.yml b/modules/nf-core/msisensorpro/msisomatic/environment.yml new file mode 100644 index 0000000000..3b66afc67e --- /dev/null +++ b/modules/nf-core/msisensorpro/msisomatic/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::msisensor-pro=1.3.0 diff --git a/modules/nf-core/modules/msisensorpro/msi_somatic/main.nf b/modules/nf-core/msisensorpro/msisomatic/main.nf similarity index 54% rename from modules/nf-core/modules/msisensorpro/msi_somatic/main.nf rename to modules/nf-core/msisensorpro/msisomatic/main.nf index e2da70de7c..41e8a04cfb 100644 --- a/modules/nf-core/modules/msisensorpro/msi_somatic/main.nf +++ b/modules/nf-core/msisensorpro/msisomatic/main.nf @@ -1,22 +1,22 @@ -process MSISENSORPRO_MSI_SOMATIC { +process MSISENSORPRO_MSISOMATIC { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::msisensor-pro=1.2.0" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/msisensor-pro:1.2.0--hfc31af2_0' : - 'quay.io/biocontainers/msisensor-pro:1.2.0--hfc31af2_0' }" + 'https://depot.galaxyproject.org/singularity/msisensor-pro%3A1.3.0--hfef96ef_0': + 'biocontainers/msisensor-pro:1.3.0--hfef96ef_0' }" input: tuple val(meta), path(normal), path(normal_index), path(tumor), path(tumor_index), path(intervals) - path (fasta) - path (msisensor_scan) + tuple val(meta2), path(fasta) + path(msisensor_scan) output: tuple val(meta), path("${prefix}") , emit: output_report tuple val(meta), path("${prefix}_dis") , emit: output_dis - tuple val(meta), path("${prefix}_germline"), emit: output_germline - tuple val(meta), path("${prefix}_somatic") , emit: output_somatic + tuple val(meta), path("${prefix}_germline"), emit: output_germline, optional: true + tuple val(meta), path("${prefix}_somatic") , emit: output_somatic, optional: true path "versions.yml" , emit: versions when: @@ -25,19 +25,35 @@ process MSISENSORPRO_MSI_SOMATIC { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def fasta = fasta ? "-g ${fasta}" : "" - def intervals = intervals ? " -e ${intervals} " : "" + def fasta_cmd = fasta ? "-g ${fasta}" : "" + def intervals_cmd = intervals ? " -e ${intervals} " : "" + """ msisensor-pro \\ msi \\ -d ${msisensor_scan} \\ -n ${normal} \\ -t ${tumor} \\ - ${fasta} \\ - -o $prefix \\ + ${fasta_cmd} \\ + -o ${prefix} \\ -b ${task.cpus} \\ - ${intervals} \\ - $args + ${intervals_cmd} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + msisensor-pro: \$(msisensor-pro 2>&1 | sed -nE 's/Version:\\sv([0-9]\\.[0-9])/\\1/ p') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix} + touch ${prefix}_dis + touch ${prefix}_germline + touch ${prefix}_somatic cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/msisensorpro/msisomatic/meta.yml b/modules/nf-core/msisensorpro/msisomatic/meta.yml new file mode 100644 index 0000000000..1c05e25e3c --- /dev/null +++ b/modules/nf-core/msisensorpro/msisomatic/meta.yml @@ -0,0 +1,118 @@ +name: msisensorpro_msisomatic +description: MSIsensor-pro evaluates Microsatellite Instability (MSI) for cancer patients + with next generation sequencing data. It accepts the whole genome sequencing, whole + exome sequencing and target region (panel) sequencing data as input +keywords: + - micro-satellite-scan + - msisensor-pro + - msi + - somatic +tools: + - msisensorpro: + description: Microsatellite Instability (MSI) detection using high-throughput + sequencing data. + homepage: https://github.com/xjtu-omics/msisensor-pro + documentation: https://github.com/xjtu-omics/msisensor-pro/wiki + tool_dev_url: https://github.com/xjtu-omics/msisensor-pro + doi: "10.1016/j.gpb.2020.02.001" + licence: ["Custom Licence"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - normal: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - normal_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + - tumor: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - tumor_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + - intervals: + type: file + description: bed file containing interval information, optional + pattern: "*.{bed}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing genome information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome + pattern: "*.{fasta}" + ontologies: [] + - msisensor_scan: + type: file + description: Output from msisensor-pro/scan, containing list of msi regions + pattern: "*.list" + ontologies: [] +output: + output_report: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}: + type: file + description: File containing final report with all detected microsatellites, + unstable somatic microsatellites, msi score + ontologies: [] + output_dis: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}_dis: + type: file + description: File containing distribution results + ontologies: [] + output_germline: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}_germline: + type: file + description: File containing germline results + ontologies: [] + output_somatic: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}_somatic: + type: file + description: File containing somatic results + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/msisensorpro/scan/environment.yml b/modules/nf-core/msisensorpro/scan/environment.yml new file mode 100644 index 0000000000..3b66afc67e --- /dev/null +++ b/modules/nf-core/msisensorpro/scan/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::msisensor-pro=1.3.0 diff --git a/modules/nf-core/modules/msisensorpro/scan/main.nf b/modules/nf-core/msisensorpro/scan/main.nf similarity index 62% rename from modules/nf-core/modules/msisensorpro/scan/main.nf rename to modules/nf-core/msisensorpro/scan/main.nf index 752606d620..7ef4ae172c 100644 --- a/modules/nf-core/modules/msisensorpro/scan/main.nf +++ b/modules/nf-core/msisensorpro/scan/main.nf @@ -2,10 +2,10 @@ process MSISENSORPRO_SCAN { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::msisensor-pro=1.2.0" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/msisensor-pro:1.2.0--hfc31af2_0' : - 'quay.io/biocontainers/msisensor-pro:1.2.0--hfc31af2_0' }" + 'https://depot.galaxyproject.org/singularity/msisensor-pro%3A1.3.0--hfef96ef_0': + 'biocontainers/msisensor-pro:1.3.0--hfef96ef_0' }" input: tuple val(meta), path(fasta) @@ -32,4 +32,15 @@ process MSISENSORPRO_SCAN { msisensor-pro: \$(msisensor-pro 2>&1 | sed -nE 's/Version:\\sv([0-9]\\.[0-9])/\\1/ p') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.msisensor_scan.list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + msisensor-pro: \$(msisensor-pro 2>&1 | sed -nE 's/Version:\\sv([0-9]\\.[0-9])/\\1/ p') + END_VERSIONS + """ } diff --git a/modules/nf-core/msisensorpro/scan/meta.yml b/modules/nf-core/msisensorpro/scan/meta.yml new file mode 100644 index 0000000000..2380922c21 --- /dev/null +++ b/modules/nf-core/msisensorpro/scan/meta.yml @@ -0,0 +1,52 @@ +name: msisensorpro_scan +description: MSIsensor-pro evaluates Microsatellite Instability (MSI) for cancer patients + with next generation sequencing data. It accepts the whole genome sequencing, whole + exome sequencing and target region (panel) sequencing data as input +keywords: + - micro-satellite-scan + - msisensor-pro + - scan +tools: + - msisensorpro: + description: Microsatellite Instability (MSI) detection using high-throughput + sequencing data. + homepage: https://github.com/xjtu-omics/msisensor-pro + documentation: https://github.com/xjtu-omics/msisensor-pro/wiki + tool_dev_url: https://github.com/xjtu-omics/msisensor-pro + doi: "10.1016/j.gpb.2020.02.001" + licence: ["Custom Licence"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Reference genome + pattern: "*.{fasta}" + ontologies: [] +output: + list: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.list": + type: file + description: File containing microsatellite list + pattern: "*.{list}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 0000000000..009874d4c0 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::multiqc=1.33 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf new file mode 100644 index 0000000000..3b0e975be6 --- /dev/null +++ b/modules/nf-core/multiqc/main.nf @@ -0,0 +1,55 @@ +process MULTIQC { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/34/34e733a9ae16a27e80fe00f863ea1479c96416017f24a907996126283e7ecd4d/data' : + 'community.wave.seqera.io/library/multiqc:1.33--ee7739d47738383b' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + path(replace_names) + path(sample_names) + + output: + path "*.html" , emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + tuple val("${task.process}"), val('multiqc'), eval('multiqc --version | sed "s/.* //g"'), emit: versions + // MultiQC should not push its versions to the `versions` topic. Its input depends on the versions topic to be resolved thus outputting to the topic will let the pipeline hang forever + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' + def config = multiqc_config ? "--config ${multiqc_config}" : '' + def extra_config = extra_multiqc_config ? "--config ${extra_multiqc_config}" : '' + def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' + """ + multiqc \\ + --force \\ + ${args} \\ + ${config} \\ + ${prefix} \\ + ${extra_config} \\ + ${logo} \\ + ${replace} \\ + ${samples} \\ + . + """ + + stub: + """ + mkdir multiqc_data + touch multiqc_data/.stub + mkdir multiqc_plots + touch multiqc_report.html + """ +} diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml new file mode 100644 index 0000000000..e4b8f94ddc --- /dev/null +++ b/modules/nf-core/multiqc/meta.yml @@ -0,0 +1,96 @@ +name: multiqc +description: Aggregate results from bioinformatics analyses across many samples into + a single report +keywords: + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report +tools: + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ + licence: ["GPL-3.0-or-later"] + identifier: biotools:multiqc +input: + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + ontologies: [] + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + ontologies: [] + - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV +output: + report: + - "*.html": + type: file + description: MultiQC report file + pattern: ".html" + ontologies: [] + data: + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + plots: + - "*_plots": + type: file + description: Plots created by MultiQC + pattern: "*_data" + ontologies: [] + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - multiqc: + type: string + description: The tool name + - multiqc --version | sed "s/.* //g": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/muse/call/environment.yml b/modules/nf-core/muse/call/environment.yml new file mode 100644 index 0000000000..f81029a392 --- /dev/null +++ b/modules/nf-core/muse/call/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::muse=2.1.2 diff --git a/modules/nf-core/muse/call/main.nf b/modules/nf-core/muse/call/main.nf new file mode 100644 index 0000000000..0f8ceb3c9f --- /dev/null +++ b/modules/nf-core/muse/call/main.nf @@ -0,0 +1,41 @@ +process MUSE_CALL { + tag "${meta.id}" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9f/9f0ebb574ef5eed2a6e034f1b2feea6c252d1ab0c8bc5135a669059aa1f4d2ca/data' + : 'community.wave.seqera.io/library/muse:6637291dcbb0bdb8'}" + + input: + tuple val(meta), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai), path(reference) + + output: + tuple val(meta), path("*.MuSE.txt"), emit: txt + tuple val("${task.process}"), val('muse'), eval("MuSE --version | sed -e 's/MuSE, version //g' | sed -e 's/MuSE v//g'"), topic: versions, emit: versions_muse + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + MuSE \\ + call \\ + ${args} \\ + -f ${reference} \\ + -O ${prefix} \\ + -n ${task.cpus} \\ + ${tumor_bam} \\ + ${normal_bam} + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo ${args} + touch ${prefix}.MuSE.txt + """ +} diff --git a/modules/nf-core/muse/call/meta.yml b/modules/nf-core/muse/call/meta.yml new file mode 100644 index 0000000000..a3c8d61cc1 --- /dev/null +++ b/modules/nf-core/muse/call/meta.yml @@ -0,0 +1,89 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "muse_call" +description: pre-filtering and calculating position-specific summary statistics using + the Markov substitution model +keywords: + - variant calling + - somatic + - wgs + - wxs + - vcf +tools: + - "MuSE": + description: "Somatic point mutation caller based on Markov substitution model + for molecular evolution" + homepage: "https://bioinformatics.mdanderson.org/public-software/muse/" + documentation: "https://github.com/wwylab/MuSE" + tool_dev_url: "https://github.com/wwylab/MuSE" + doi: "10.1101/gr.278456.123" + licence: ["https://github.com/danielfan/MuSE/blob/master/LICENSE"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - tumor_bam: + type: file + description: Sorted tumor BAM file + pattern: "*.bam" + ontologies: [] + - tumor_bai: + type: file + description: Index file for the tumor BAM file + pattern: "*.bai" + ontologies: [] + - normal_bam: + type: file + description: Sorted matched normal BAM file + pattern: "*.bam" + ontologies: [] + - normal_bai: + type: file + description: Index file for the normal BAM file + pattern: "*.bai" + ontologies: [] + - reference: + type: file + description: reference genome file + pattern: ".fasta" + ontologies: [] +output: + txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.MuSE.txt": + type: file + description: position-specific summary statistics + pattern: "*.MuSE.txt" + ontologies: [] + versions_muse: + - - ${task.process}: + type: string + description: The process the versions were collected from + - muse: + type: string + description: The tool name + - "MuSE --version | sed -e 's/MuSE, version //g' | sed -e 's/MuSE v//g'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - muse: + type: string + description: The tool name + - "MuSE --version | sed -e 's/MuSE, version //g' | sed -e 's/MuSE v//g'": + type: string + description: The command used to generate the version of the tool +authors: + - "@famosab" +maintainers: + - "@famosab" diff --git a/modules/nf-core/muse/sump/environment.yml b/modules/nf-core/muse/sump/environment.yml new file mode 100644 index 0000000000..7a72409583 --- /dev/null +++ b/modules/nf-core/muse/sump/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::muse=2.1.2 + - bioconda::htslib=1.22.1 diff --git a/modules/nf-core/muse/sump/main.nf b/modules/nf-core/muse/sump/main.nf new file mode 100644 index 0000000000..3758ad38a7 --- /dev/null +++ b/modules/nf-core/muse/sump/main.nf @@ -0,0 +1,54 @@ +process MUSE_SUMP { + tag "${meta.id}" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/83/83d1d3caa1b6ce54ce999e0061d7fe8acbe6788d5c7970574eff330ea819fb85/data' + : 'community.wave.seqera.io/library/htslib_muse:9a4b9cb78c211f1e'}" + + input: + tuple val(meta), path(muse_call_txt), path(ref_vcf), path(ref_vcf_tbi) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.vcf.gz.tbi"), emit: tbi + tuple val("${task.process}"), val('muse'), eval("MuSE --version | sed -e 's/MuSE, version //g' | sed -e 's/MuSE v//g'"), topic: versions, emit: versions_muse + tuple val("${task.process}"), val('bgzip'), eval("bgzip --version | sed -n 's/bgzip (htslib) \\([0-9.]*\\)/\\1/p'"), topic: versions, emit: versions_bgzip + + when: + task.ext.when == null || task.ext.when + + script: + // -G for WGS data and -E for WES data + def args = task.ext.args ?: '' + // args for bgzip + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // MuSE complains if the timestamp of the dbsnp VCF index is older than the timestamp of the VCF itself, so we need to touch it here + """ + touch ${ref_vcf_tbi} + + MuSE \\ + sump \\ + ${args} \\ + -I ${muse_call_txt} \\ + -n ${task.cpus} \\ + -D ${ref_vcf} \\ + -O ${prefix}.vcf + + bgzip ${args2} --threads ${task.cpus} ${prefix}.vcf + tabix -p vcf ${prefix}.vcf.gz + """ + + stub: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo ${args} + echo ${args2} + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + """ +} diff --git a/modules/nf-core/muse/sump/meta.yml b/modules/nf-core/muse/sump/meta.yml new file mode 100644 index 0000000000..4c26737620 --- /dev/null +++ b/modules/nf-core/muse/sump/meta.yml @@ -0,0 +1,110 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "muse_sump" +description: Computes tier-based cutoffs from a sample-specific error model which + is generated by muse/call and reports the finalized variants +keywords: + - variant calling + - somatic + - wgs + - wxs + - vcf +tools: + - "MuSE": + description: "Somatic point mutation caller based on Markov substitution model + for molecular evolution" + homepage: "https://bioinformatics.mdanderson.org/public-software/muse/" + documentation: "https://github.com/wwylab/MuSE" + tool_dev_url: "https://github.com/wwylab/MuSE" + doi: "10.1101/gr.278456.123" + licence: ["https://github.com/danielfan/MuSE/blob/master/LICENSE"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - muse_call_txt: + type: file + description: single input file generated by 'MuSE call' + pattern: "*.MuSE.txt" + ontologies: [] + - ref_vcf: + type: file + description: | + dbSNP vcf file that should be bgzip compressed, tabix indexed and + based on the same reference genome used in 'MuSE call' + pattern: ".vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - ref_vcf_tbi: + type: file + description: Tabix index for the dbSNP vcf file + pattern: ".vcf.gz.tbi" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.vcf.gz": + type: map + description: bgzipped vcf file with called variants + pattern: "*.vcf.gz" + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.vcf.gz.tbi": + type: map + description: tabix index of bgzipped vcf file with called variants + pattern: "*.vcf.gz.tbi" + versions_muse: + - - ${task.process}: + type: string + description: The process the versions were collected from + - muse: + type: string + description: The tool name + - "MuSE --version | sed -e 's/MuSE, version //g' | sed -e 's/MuSE v//g'": + type: string + description: The command used to generate the version of the tool + versions_bgzip: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bgzip: + type: string + description: The tool name + - "bgzip --version | sed -n 's/bgzip (htslib) \\([0-9.]*\\)/\\1/p'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - muse: + type: string + description: The tool name + - "MuSE --version | sed -e 's/MuSE, version //g' | sed -e 's/MuSE v//g'": + type: string + description: The command used to generate the version of the tool + - - ${task.process}: + type: string + description: The process the versions were collected from + - bgzip: + type: string + description: The tool name + - "bgzip --version | sed -n 's/bgzip (htslib) \\([0-9.]*\\)/\\1/p'": + type: string + description: The command used to generate the version of the tool +authors: + - "@famosab" +maintainers: + - "@famosab" diff --git a/modules/nf-core/ngscheckmate/ncm/environment.yml b/modules/nf-core/ngscheckmate/ncm/environment.yml new file mode 100644 index 0000000000..7348216563 --- /dev/null +++ b/modules/nf-core/ngscheckmate/ncm/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::ngscheckmate=1.0.1 + - bioconda::bcftools=1.21 diff --git a/modules/nf-core/ngscheckmate/ncm/main.nf b/modules/nf-core/ngscheckmate/ncm/main.nf new file mode 100644 index 0000000000..ffb64a86b5 --- /dev/null +++ b/modules/nf-core/ngscheckmate/ncm/main.nf @@ -0,0 +1,65 @@ +process NGSCHECKMATE_NCM { + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ngscheckmate:1.0.1--py312pl5321h577a1d6_4': + 'biocontainers/ngscheckmate:1.0.1--py312pl5321h577a1d6_4' }" + + input: + tuple val(meta) , path(files) + tuple val(meta2), path(snp_bed) + tuple val(meta3), path(fasta) + + output: + tuple val(meta), path("*_corr_matrix.txt"), emit: corr_matrix + tuple val(meta), path("*_matched.txt") , emit: matched + tuple val(meta), path("*_all.txt") , emit: all + tuple val(meta), path("*.pdf") , emit: pdf, optional: true + tuple val(meta), path("*.vcf") , emit: vcf, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "$meta.id" + def unzip = files.any { it.toString().endsWith(".vcf.gz") } + """ + if $unzip + then + for VCFGZ in *.vcf.gz; do + gunzip -cdf \$VCFGZ > \$( basename \$VCFGZ .gz ); + done + fi + + NCM_REF="./"${fasta} ncm.py -d . -bed ${snp_bed} -O . -N ${prefix} $args + + if $unzip + then + rm -f *.vcf # clean up decompressed vcfs + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ngscheckmate: \$(ncm.py --help | sed "7!d;s/ *Ensuring Sample Identity v//g") + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "$meta.id" + """ + touch ${prefix}_output_corr_matrix.txt + touch ${prefix}_matched.txt + touch ${prefix}_all.txt + touch ${prefix}.pdf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ngscheckmate: \$(ncm.py --help | sed "7!d;s/ *Ensuring Sample Identity v//g") + END_VERSIONS + """ + +} diff --git a/modules/nf-core/ngscheckmate/ncm/meta.yml b/modules/nf-core/ngscheckmate/ncm/meta.yml new file mode 100644 index 0000000000..06c131d6b0 --- /dev/null +++ b/modules/nf-core/ngscheckmate/ncm/meta.yml @@ -0,0 +1,106 @@ +name: ngscheckmate_ncm +description: Determining whether sequencing data comes from the same individual by + using SNP matching. Designed for humans on vcf or bam files. +keywords: + - ngscheckmate + - matching + - snp +tools: + - ngscheckmate: + description: NGSCheckMate is a software package for identifying next generation + sequencing (NGS) data files from the same individual, including matching between + DNA and RNA. + homepage: https://github.com/parklab/NGSCheckMate + documentation: https://github.com/parklab/NGSCheckMate + tool_dev_url: https://github.com/parklab/NGSCheckMate + doi: "10.1093/nar/gkx193" + licence: ["MIT"] + identifier: biotools:ngscheckmate +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - files: + type: file + description: VCF or BAM files for each sample, in a merged channel (possibly + gzipped). BAM files require an index too. + pattern: "*.{vcf,vcf.gz,bam,bai}" + - - meta2: + type: map + description: | + Groovy Map containing SNP information + e.g. [ id:'test' ] + - snp_bed: + type: file + description: BED file containing the SNPs to analyse + pattern: "*.{bed}" + - - meta3: + type: map + description: | + Groovy Map containing reference fasta index information + e.g. [ id:'test' ] + - fasta: + type: file + description: fasta file for the genome, only used in the bam mode + pattern: "*.{bed}" +output: + - corr_matrix: + - meta: + type: file + description: A text file containing the correlation matrix between each sample + pattern: "*corr_matrix.txt" + - "*_corr_matrix.txt": + type: file + description: A text file containing the correlation matrix between each sample + pattern: "*corr_matrix.txt" + - matched: + - meta: + type: file + description: A txt file containing only the samples that match with each other + pattern: "*matched.txt" + - "*_matched.txt": + type: file + description: A txt file containing only the samples that match with each other + pattern: "*matched.txt" + - all: + - meta: + type: file + description: A txt file containing all the sample comparisons, whether they + match or not + pattern: "*all.txt" + - "*_all.txt": + type: file + description: A txt file containing all the sample comparisons, whether they + match or not + pattern: "*all.txt" + - pdf: + - meta: + type: file + description: A pdf containing a dendrogram showing how the samples match up + pattern: "*.{pdf}" + - "*.pdf": + type: file + description: A pdf containing a dendrogram showing how the samples match up + pattern: "*.{pdf}" + - vcf: + - meta: + type: file + description: If ran in bam mode, vcf files for each sample giving the SNP calls + used + pattern: "*.vcf" + - "*.vcf": + type: file + description: If ran in bam mode, vcf files for each sample giving the SNP calls + used + pattern: "*.vcf" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@sppearce" +maintainers: + - "@sppearce" diff --git a/modules/nf-core/parabricks/fq2bam/main.nf b/modules/nf-core/parabricks/fq2bam/main.nf new file mode 100644 index 0000000000..5f78b4dbde --- /dev/null +++ b/modules/nf-core/parabricks/fq2bam/main.nf @@ -0,0 +1,107 @@ +process PARABRICKS_FQ2BAM { + tag "${meta.id}" + label 'process_high' + label 'process_gpu' + // needed by the module to run on a cluster because we need to copy the fasta reference, see https://github.com/nf-core/modules/issues/9230 + stageInMode 'copy' + + container "nvcr.io/nvidia/clara/clara-parabricks:4.6.0-1" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(fasta) + tuple val(meta3), path(index) + tuple val(meta4), path(intervals) + tuple val(meta5), path(known_sites) + val output_fmt + + output: + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.table"), emit: bqsr_table, optional: true + tuple val(meta), path("*_qc_metrics"), emit: qc_metrics, optional: true + tuple val(meta), path("*.duplicate-metrics.txt"), emit: duplicate_metrics, optional: true + path "compatible_versions.yml", emit: compatible_versions, optional: true + tuple val("${task.process}"), val('parabricks'), eval("pbrun version | grep -m1 '^pbrun:' | sed 's/^pbrun:[[:space:]]*//'"), topic: versions, emit: versions_parabricks + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Parabricks module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def in_fq_command = meta.single_end ? "--in-se-fq ${reads}" : "--in-fq ${reads}" + def extension = "${output_fmt}" + + def known_sites_command = known_sites ? (known_sites instanceof List ? known_sites.collect { knownSite -> "--knownSites ${knownSite}" }.join(' ') : "--knownSites ${known_sites}") : "" + def known_sites_output_cmd = known_sites ? "--out-recal-file ${prefix}.table" : "" + def intervals_command = intervals ? (intervals instanceof List ? intervals.collect { interval -> "--interval-file ${interval}" }.join(' ') : "--interval-file ${intervals}") : "" + + def num_gpus = task.accelerator ? "--num-gpus ${task.accelerator.request}" : '' + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + cp ${fasta} \$INDEX + + pbrun \\ + fq2bam \\ + --ref \$INDEX \\ + ${in_fq_command} \\ + --out-bam ${prefix}.${extension} \\ + ${known_sites_command} \\ + ${known_sites_output_cmd} \\ + ${intervals_command} \\ + ${num_gpus} \\ + --bwa-cpu-thread-pool ${task.cpus} \\ + --monitor-usage \\ + ${args} + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Parabricks module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = "${output_fmt}" + def extension_index = "${output_fmt}" == "cram" ? "crai" : "bai" + def known_sites_output = known_sites ? "touch ${prefix}.table" : "" + def qc_metrics_output = args.contains("--out-qc-metrics-dir") ? "mkdir ${prefix}_qc_metrics" : "" + def duplicate_metrics_output = args.contains("--out-duplicate-metrics") ? "touch ${prefix}.duplicate-metrics.txt" : "" + """ + touch ${prefix}.${extension} + touch ${prefix}.${extension}.${extension_index} + ${known_sites_output} + ${qc_metrics_output} + ${duplicate_metrics_output} + + # Capture once and build single-line compatible_with (spaces only, no tabs) + pbrun_version_output=\$(pbrun fq2bam --version 2>&1) + + # Because of a space between BWA and mem in the version output this is handled different to the other modules + compat_line=\$(echo "\$pbrun_version_output" | awk -F':' ' + /Compatible With:/ {on=1; next} + /^---/ {on=0} + on && /:/ { + key=\$1; val=\$2 + gsub(/[ \\t]+/, " ", key); gsub(/^[ \\t]+|[ \\t]+\$/, "", key) + gsub(/[ \\t]+/, " ", val); gsub(/^[ \\t]+|[ \\t]+\$/, "", val) + a[++i]=key ": " val + } + END { for (j=1;j<=i;j++) printf "%s%s", (j>1?", ":""), a[j] } + ') + + cat < compatible_versions.yml + "${task.process}": + pbrun_version: \$(echo "\$pbrun_version_output" | awk '/^pbrun:/ {print \$2; exit}') + compatible_with: "\$compat_line" + EOF + """ +} diff --git a/modules/nf-core/parabricks/fq2bam/meta.yml b/modules/nf-core/parabricks/fq2bam/meta.yml new file mode 100644 index 0000000000..a84dc0bf48 --- /dev/null +++ b/modules/nf-core/parabricks/fq2bam/meta.yml @@ -0,0 +1,187 @@ +name: "parabricks_fq2bam" +description: NVIDIA Clara Parabricks GPU-accelerated alignment, sorting, BQSR + calculation, and duplicate marking. Note this nf-core module requires files to + be copied into the working directory and not symlinked. +keywords: + - align + - sort + - bqsr + - duplicates +tools: + - "parabricks": + description: "NVIDIA Clara Parabricks GPU-accelerated genomics tools" + homepage: "https://www.nvidia.com/en-us/clara/genomics/" + documentation: "https://docs.nvidia.com/clara/parabricks/latest/index.html" + licence: ["custom"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: fastq.gz files + pattern: "*.fastq.gz" + ontologies: + - edam: "http://edamontology.org/format_1930" # FASTQ + - edam: "http://edamontology.org/format_3989" # GZIP format + - - meta2: + type: map + description: | + Groovy Map containing fasta information + - fasta: + type: file + description: reference fasta file - must be unzipped + pattern: "*.fasta" + ontologies: + - edam: "http://edamontology.org/format_1929" # FASTA + - - meta3: + type: map + description: | + Groovy Map containing index information + - index: + type: file + description: reference BWA index + pattern: "*.{amb,ann,bwt,pac,sa}" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing index information + - intervals: + type: file + description: (optional) file(s) containing genomic intervals for use in + base quality score recalibration (BQSR) + pattern: "*.{bed,interval_list,picard,list,intervals}" + ontologies: [] + - - meta5: + type: map + description: | + Groovy Map containing known sites information + - known_sites: + type: file + description: (optional) known sites file(s) for calculating BQSR. markdups + must be true to perform BQSR. + pattern: "*.vcf.gz" + ontologies: + - edam: "http://edamontology.org/format_3016" # VCF + - edam: "http://edamontology.org/format_3989" # GZIP + - output_fmt: + type: string + description: Output format for the alignment. Options are 'bam' or 'cram' + pattern: "{bam,cram}" +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Sorted BAM file + pattern: "*.bam" + ontologies: [] + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: index corresponding to sorted BAM file + pattern: "*.bai" + ontologies: [] + cram: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Sorted CRAM file + pattern: "*.cram" + ontologies: [] + crai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: index corresponding to sorted CRAM file + pattern: "*.crai" + ontologies: [] + bqsr_table: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.table": + type: file + description: (optional) table from base quality score recalibration calculation, to be used with parabricks/applybqsr + pattern: "*.table" + ontologies: [] + qc_metrics: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_qc_metrics": + type: directory + description: (optional) optional directory of qc metrics + pattern: "*_qc_metrics" + duplicate_metrics: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.duplicate-metrics.txt": + type: file + description: (optional) metrics calculated from marking duplicates in the bam file + pattern: "*.duplicate-metrics.txt" + ontologies: [] + compatible_versions: + - compatible_versions.yml: + type: file + description: File containing info on compatible CPU-based software versions. + pattern: "compatible_versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + versions_parabricks: + - - ${task.process}: + type: string + description: The process the versions were collected from + - parabricks: + type: string + description: The tool name + - "pbrun version | grep -m1 '^pbrun:' | sed 's/^pbrun:[[:space:]]*//'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - parabricks: + type: string + description: The tool name + - "pbrun version | grep -m1 '^pbrun:' | sed 's/^pbrun:[[:space:]]*//'": + type: string + description: The command used to generate the version of the tool +authors: + - "@bsiranosian" + - "@adamrtalbot" +maintainers: + - "@bsiranosian" + - "@adamrtalbot" + - "@gallvp" + - "@famosab" diff --git a/modules/nf-core/rbt/vcfsplit/environment.yml b/modules/nf-core/rbt/vcfsplit/environment.yml new file mode 100644 index 0000000000..58b4e8e0c7 --- /dev/null +++ b/modules/nf-core/rbt/vcfsplit/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::rust-bio-tools=0.42.2" diff --git a/modules/nf-core/rbt/vcfsplit/main.nf b/modules/nf-core/rbt/vcfsplit/main.nf new file mode 100644 index 0000000000..24e85cd5cb --- /dev/null +++ b/modules/nf-core/rbt/vcfsplit/main.nf @@ -0,0 +1,38 @@ +process RBT_VCFSPLIT { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/rust-bio-tools:0.42.2--h4458251_1' + : 'biocontainers/rust-bio-tools:0.42.2--h4458251_1'}" + + input: + tuple val(meta), path(vcf) + val numchunks + + output: + tuple val(meta), path("*.bcf"), emit: bcfchunks + tuple val("${task.process}"), val('rbt'), eval("rbt --version | grep -oE '[0-9]+(\\.[0-9]+)+' | head -n 1"), topic: versions, emit: versions_rbt + + when: + task.ext.when == null || task.ext.when + + script: + // No args because tool does not accept args, only stdin/stdout + def prefix = task.ext.prefix ?: "${meta.id}" + def chunks = numchunks ? (numchunks - 1) : 15 + """ + rbt vcf-split \\ + ${vcf} \\ + ${prefix}.{0..${chunks}}.bcf + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def chunks = numchunks ? (numchunks - 1) : 15 + def bcf_files = (0..chunks).collect { items -> "${prefix}.${items}.bcf" }.join(' ') + """ + touch ${bcf_files} + """ +} diff --git a/modules/nf-core/rbt/vcfsplit/meta.yml b/modules/nf-core/rbt/vcfsplit/meta.yml new file mode 100644 index 0000000000..d0d59b8b94 --- /dev/null +++ b/modules/nf-core/rbt/vcfsplit/meta.yml @@ -0,0 +1,75 @@ +name: "rbt_vcfsplit" +description: A tool for splitting VCF/BCF files into N equal chunks, including BND support +keywords: + - genomics + - splitting + - VCF + - BCF + - variants +tools: + - rust-bio-tools: + description: "A growing collection of fast and secure command line utilities for dealing with NGS data implemented on top of Rust-Bio." + homepage: "https://github.com/rust-bio/rust-bio-tools" + documentation: "https://github.com/rust-bio/rust-bio-tools" + tool_dev_url: "https://github.com/rust-bio/rust-bio-tools" + doi: "no DOI available" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - vcf: + type: file + description: VCF file with variants to be split + pattern: "*.{vcf,bcf,vcf.gz}" + ontologies: + - edam: "http://edamontology.org/format_3016" # VCF + - edam: http://edamontology.org/format_3020 # BCF + - numchunks: + type: integer + description: Number of chunks to split the VCF file into. The default is 15. + +output: + bcfchunks: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.bcf": + type: file + description: Chunks of the input VCF file, split into `numchunks` equal parts. + pattern: "*.bcf" + ontologies: + - edam: http://edamontology.org/format_3020 # BCF + versions_rbt: + - - ${task.process}: + type: string + description: The process the versions were collected from + - rbt: + type: string + description: The tool name + - "rbt --version | grep -oE '[0-9]+(\\.[0-9]+)+' | head -n 1": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - rbt: + type: string + description: The tool name + - "rbt --version | grep -oE '[0-9]+(\\.[0-9]+)+' | head -n 1": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@famosab" +maintainers: + - "@famosab" diff --git a/modules/nf-core/samtools/bam2fq/environment.yml b/modules/nf-core/samtools/bam2fq/environment.yml new file mode 100644 index 0000000000..62054fc97a --- /dev/null +++ b/modules/nf-core/samtools/bam2fq/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/modules/samtools/bam2fq/main.nf b/modules/nf-core/samtools/bam2fq/main.nf similarity index 85% rename from modules/nf-core/modules/samtools/bam2fq/main.nf rename to modules/nf-core/samtools/bam2fq/main.nf index 32588dd7f8..1d3049e565 100644 --- a/modules/nf-core/modules/samtools/bam2fq/main.nf +++ b/modules/nf-core/samtools/bam2fq/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_BAM2FQ { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(inputbam) @@ -15,6 +15,9 @@ process SAMTOOLS_BAM2FQ { tuple val(meta), path("*.fq.gz"), emit: reads path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" @@ -42,7 +45,7 @@ process SAMTOOLS_BAM2FQ { bam2fq \\ $args \\ -@ $task.cpus \\ - $inputbam >${prefix}_interleaved.fq.gz + $inputbam | gzip --no-name > ${prefix}_interleaved.fq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/bam2fq/meta.yml b/modules/nf-core/samtools/bam2fq/meta.yml new file mode 100644 index 0000000000..b17ed608d0 --- /dev/null +++ b/modules/nf-core/samtools/bam2fq/meta.yml @@ -0,0 +1,54 @@ +name: samtools_bam2fq +description: | + The module uses bam2fq method from samtools to + convert a SAM, BAM or CRAM file to FASTQ format +keywords: + - bam2fq + - samtools + - fastq +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files + documentation: http://www.htslib.org/doc/1.1/samtools.html + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - inputbam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - - split: + type: boolean + description: | + TRUE/FALSE value to indicate if reads should be separated into + /1, /2 and if present other, or singleton. + Note: choosing TRUE will generate 4 different files. + Choosing FALSE will produce a single file, which will be interleaved in case + the input contains paired reads. +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fq.gz": + type: file + description: | + FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton) + or a single interleaved .fq.gz file if the user chooses not to split the reads. + pattern: "*.fq.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@lescai" +maintainers: + - "@lescai" diff --git a/modules/nf-core/samtools/collatefastq/environment.yml b/modules/nf-core/samtools/collatefastq/environment.yml new file mode 100644 index 0000000000..62054fc97a --- /dev/null +++ b/modules/nf-core/samtools/collatefastq/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/collatefastq/main.nf b/modules/nf-core/samtools/collatefastq/main.nf new file mode 100644 index 0000000000..8b70ebd345 --- /dev/null +++ b/modules/nf-core/samtools/collatefastq/main.nf @@ -0,0 +1,76 @@ +process SAMTOOLS_COLLATEFASTQ { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input) + tuple val(meta2), path(fasta) + val(interleave) + + output: + tuple val(meta), path("*_{1,2}.fq.gz") , optional:true, emit: fastq + tuple val(meta), path("*_interleaved.fq") , optional:true, emit: fastq_interleaved + tuple val(meta), path("*_other.fq.gz") , emit: fastq_other + tuple val(meta), path("*_singleton.fq.gz") , optional:true, emit: fastq_singleton + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + def output = (interleave && ! meta.single_end) ? "> ${prefix}_interleaved.fq" : + meta.single_end ? "-1 ${prefix}_1.fq.gz -s ${prefix}_singleton.fq.gz" : + "-1 ${prefix}_1.fq.gz -2 ${prefix}_2.fq.gz -s ${prefix}_singleton.fq.gz" + + """ + samtools collate \\ + $args \\ + --threads $task.cpus \\ + ${reference} \\ + -O \\ + $input \\ + . | + + samtools fastq \\ + $args2 \\ + --threads $task.cpus \\ + ${reference} \\ + -0 ${prefix}_other.fq.gz \\ + $output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def empty = "echo '' | gzip " + def singletoncommand = "${empty}> ${prefix}_singleton.fq.gz" + def interleavecommand = interleave && !meta.single_end ? "${empty}> ${prefix}_interleaved.fq.gz" : "" + def output1command = !interleave ? "${empty}> ${prefix}_1.fq.gz" : "" + def output2command = !interleave && !meta.single_end ? "${empty}> ${prefix}_2.fq.gz" : "" + + """ + ${output1command} + ${output2command} + ${interleavecommand} + ${singletoncommand} + ${empty}> ${prefix}_other.fq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/collatefastq/meta.yml b/modules/nf-core/samtools/collatefastq/meta.yml new file mode 100644 index 0000000000..5bc912496e --- /dev/null +++ b/modules/nf-core/samtools/collatefastq/meta.yml @@ -0,0 +1,105 @@ +name: samtools_collatefastq +description: | + The module uses collate and then fastq methods from samtools to + convert a SAM, BAM or CRAM file to FASTQ format +keywords: + - bam2fq + - samtools + - fastq +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files + documentation: http://www.htslib.org/doc/1.1/samtools.html + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" + - - interleave: + type: boolean + description: | + If true, the output is a single interleaved paired-end FASTQ + If false, the output split paired-end FASTQ + default: false +output: + - fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*_{1,2}.fq.gz" + - "*_{1,2}.fq.gz": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*_{1,2}.fq.gz" + - fastq_interleaved: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*_interleaved.fq.gz" + - "*_interleaved.fq": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*_interleaved.fq.gz" + - fastq_other: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*_other.fq.gz" + - "*_other.fq.gz": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*_other.fq.gz" + - fastq_singleton: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*_singleton.fq.gz" + - "*_singleton.fq.gz": + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*_singleton.fq.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@lescai" + - "@maxulysse" + - "@matthdsm" +maintainers: + - "@lescai" + - "@maxulysse" + - "@matthdsm" diff --git a/modules/nf-core/samtools/convert/environment.yml b/modules/nf-core/samtools/convert/environment.yml new file mode 100644 index 0000000000..62054fc97a --- /dev/null +++ b/modules/nf-core/samtools/convert/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/convert/main.nf b/modules/nf-core/samtools/convert/main.nf new file mode 100644 index 0000000000..9667e72d84 --- /dev/null +++ b/modules/nf-core/samtools/convert/main.nf @@ -0,0 +1,60 @@ +process SAMTOOLS_CONVERT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(index) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*.bam") , emit: bam , optional: true + tuple val(meta), path("*.cram") , emit: cram, optional: true + tuple val(meta), path("*.bai") , emit: bai , optional: true + tuple val(meta), path("*.crai") , emit: crai, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def output_extension = input.getExtension() == "bam" ? "cram" : "bam" + + """ + samtools view \\ + --threads ${task.cpus} \\ + --reference ${fasta} \\ + $args \\ + $input \\ + -o ${prefix}.${output_extension} + + samtools index -@${task.cpus} ${prefix}.${output_extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def output_extension = input.getExtension() == "bam" ? "cram" : "bam" + def index_extension = output_extension == "bam" ? "bai" : "crai" + + """ + touch ${prefix}.${output_extension} + touch ${prefix}.${output_extension}.${index_extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/convert/meta.yml b/modules/nf-core/samtools/convert/meta.yml new file mode 100644 index 0000000000..d5bfa161ba --- /dev/null +++ b/modules/nf-core/samtools/convert/meta.yml @@ -0,0 +1,103 @@ +name: samtools_convert +description: convert and then index CRAM -> BAM or BAM -> CRAM file +keywords: + - view + - index + - bam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram}" + - index: + type: file + description: BAM/CRAM index file + pattern: "*.{bai,crai}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Reference file to create the CRAM file + pattern: "*.{fasta,fa}" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: Reference index file to create the CRAM file + pattern: "*.{fai}" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: filtered/converted BAM file + pattern: "*{.bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: filtered/converted CRAM file + pattern: "*{cram}" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: filtered/converted BAM index + pattern: "*{.bai}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: filtered/converted CRAM index + pattern: "*{.crai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@FriederikeHanssen" + - "@maxulysse" + - "@matthdsm" diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml new file mode 100644 index 0000000000..62054fc97a --- /dev/null +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf new file mode 100644 index 0000000000..6de0095d86 --- /dev/null +++ b/modules/nf-core/samtools/faidx/main.nf @@ -0,0 +1,61 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(fai) + val get_sizes + + output: + tuple val(meta), path ("*.{fa,fasta}") , emit: fa, optional: true + tuple val(meta), path ("*.sizes") , emit: sizes, optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def get_sizes_command = get_sizes ? "cut -f 1,2 ${fasta}.fai > ${fasta}.sizes" : '' + """ + samtools \\ + faidx \\ + $fasta \\ + $args + + ${get_sizes_command} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' + def get_sizes_command = get_sizes ? "touch ${fasta}.sizes" : '' + """ + ${fastacmd} + touch ${fasta}.fai + if [[ "${fasta.extension}" == "gz" ]]; then + touch ${fasta}.gzi + fi + + ${get_sizes_command} + + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 0000000000..b7a2e0c1a3 --- /dev/null +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,102 @@ +name: samtools_faidx +description: Index FASTA file, and optionally generate a file of chromosome sizes +keywords: + - index + - fasta + - faidx + - chromosome +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + ontologies: [] + - get_sizes: + type: boolean + description: use cut to get the sizes of the index (true) or not (false) + +output: + fa: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{fa,fasta}": + type: file + description: FASTA file + pattern: "*.{fa}" + ontologies: [] + sizes: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sizes": + type: file + description: File containing chromosome lengths + pattern: "*.{sizes}" + ontologies: [] + fai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fai": + type: file + description: FASTA index file + pattern: "*.{fai}" + ontologies: [] + gzi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.gzi": + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@drpatelh" + - "@ewels" + - "@phue" +maintainers: + - "@maxulysse" + - "@phue" diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 0000000000..62054fc97a --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/modules/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf similarity index 64% rename from modules/nf-core/modules/samtools/index/main.nf rename to modules/nf-core/samtools/index/main.nf index dfe0234f7c..311756102d 100644 --- a/modules/nf-core/modules/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(input) @@ -33,4 +33,17 @@ process SAMTOOLS_INDEX { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" + """ + touch ${input}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 0000000000..db8df0d505 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,71 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file +output: + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/merge/environment.yml b/modules/nf-core/samtools/merge/environment.yml new file mode 100644 index 0000000000..62054fc97a --- /dev/null +++ b/modules/nf-core/samtools/merge/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf new file mode 100644 index 0000000000..34da4c7c87 --- /dev/null +++ b/modules/nf-core/samtools/merge/main.nf @@ -0,0 +1,61 @@ +process SAMTOOLS_MERGE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input_files, stageAs: "?/*") + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam + tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai") , optional:true, emit: crai + path "versions.yml" , emit: versions + + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + merge \\ + --threads ${task.cpus-1} \\ + $args \\ + ${reference} \\ + ${prefix}.${file_type} \\ + $input_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def index_type = file_type == "bam" ? "csi" : "crai" + def index = args.contains("--write-index") ? "touch ${prefix}.${index_type}" : "" + """ + touch ${prefix}.${file_type} + ${index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml new file mode 100644 index 0000000000..235aa21945 --- /dev/null +++ b/modules/nf-core/samtools/merge/meta.yml @@ -0,0 +1,104 @@ +name: samtools_merge +description: Merge BAM or CRAM file +keywords: + - merge + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_files: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of the reference file the CRAM was created with (optional) + pattern: "*.fai" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: BAM file + pattern: "*.{bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: CRAM file + pattern: "*.{cram}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: BAM index file (optional) + pattern: "*.csi" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file (optional) + pattern: "*.crai" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/mpileup/environment.yml b/modules/nf-core/samtools/mpileup/environment.yml new file mode 100644 index 0000000000..62054fc97a --- /dev/null +++ b/modules/nf-core/samtools/mpileup/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/mpileup/main.nf b/modules/nf-core/samtools/mpileup/main.nf new file mode 100644 index 0000000000..8693aa0477 --- /dev/null +++ b/modules/nf-core/samtools/mpileup/main.nf @@ -0,0 +1,51 @@ +process SAMTOOLS_MPILEUP { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(intervals) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.mpileup.gz"), emit: mpileup + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def fasta_cmd = fasta ? "--fasta-ref $fasta" : "" + def intervals_cmd = intervals ? "-l ${intervals}" : "" + """ + samtools mpileup \\ + $fasta_cmd \\ + --output ${prefix}.mpileup \\ + $args \\ + $intervals_cmd \\ + $input + bgzip ${prefix}.mpileup + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo | gzip > ${prefix}.mpileup.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/mpileup/meta.yml b/modules/nf-core/samtools/mpileup/meta.yml new file mode 100644 index 0000000000..6195138ef0 --- /dev/null +++ b/modules/nf-core/samtools/mpileup/meta.yml @@ -0,0 +1,63 @@ +name: samtools_mpileup +description: BAM +keywords: + - mpileup + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - intervals: + type: file + description: Interval FILE + pattern: "*.bed" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" +output: + - mpileup: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.mpileup.gz": + type: file + description: mpileup file + pattern: "*.{mpileup}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@joseespinosa" +maintainers: + - "@drpatelh" + - "@joseespinosa" diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml new file mode 100644 index 0000000000..62054fc97a --- /dev/null +++ b/modules/nf-core/samtools/stats/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/modules/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf similarity index 58% rename from modules/nf-core/modules/samtools/stats/main.nf rename to modules/nf-core/samtools/stats/main.nf index f6fe3bfef8..4443948b72 100644 --- a/modules/nf-core/modules/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -1,15 +1,15 @@ process SAMTOOLS_STATS { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(input), path(input_index) - path fasta + tuple val(meta2), path(fasta) output: tuple val(meta), path("*.stats"), emit: stats @@ -19,15 +19,26 @@ process SAMTOOLS_STATS { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" """ samtools \\ stats \\ - --threads ${task.cpus-1} \\ + --threads ${task.cpus} \\ ${reference} \\ ${input} \\ - > ${input}.stats + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml new file mode 100644 index 0000000000..77b020f76e --- /dev/null +++ b/modules/nf-core/samtools/stats/meta.yml @@ -0,0 +1,66 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" +output: + - stats: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.stats": + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml new file mode 100644 index 0000000000..8cae5712d5 --- /dev/null +++ b/modules/nf-core/samtools/view/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf new file mode 100644 index 0000000000..f43a4c6e72 --- /dev/null +++ b/modules/nf-core/samtools/view/main.nf @@ -0,0 +1,103 @@ +process SAMTOOLS_VIEW { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(index) + tuple val(meta2), path(fasta) + path qname + val index_format + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${file_type}.bai"), emit: bai, optional: true + tuple val(meta), path("${prefix}.${file_type}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${file_type}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}"), emit: unselected, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}.{csi,crai}"), emit: unselected_index, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + + output_file = index_format ? "${prefix}.${file_type}##idx##${prefix}.${file_type}.${index_format} --write-index" : "${prefix}.${file_type}" + // Can't choose index type of unselected file + readnames = qname ? "--qname-file ${qname} --output-unselected ${prefix}.unselected.${file_type}": "" + + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + if (index_format) { + if (!index_format.matches('bai|csi|crai')) { + error "Index format not one of bai, csi, crai." + } else if (file_type == "sam") { + error "Indexing not compatible with SAM output" + } + } + """ + samtools \\ + view \\ + --threads ${task.cpus-1} \\ + ${reference} \\ + ${readnames} \\ + $args \\ + -o ${output_file} \\ + $input \\ + $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + default_index_format = + file_type == "bam" ? "csi" : + file_type == "cram" ? "crai" : "" + index = index_format ? "touch ${prefix}.${file_type}.${index_format}" : args.contains("--write-index") ? "touch ${prefix}.${file_type}.${default_index_format}" : "" + unselected = qname ? "touch ${prefix}.unselected.${file_type}" : "" + // Can't choose index type of unselected file + unselected_index = qname && (args.contains("--write-index") || index_format) ? "touch ${prefix}.unselected.${file_type}.${default_index_format}" : "" + + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + if (index_format) { + if (!index_format.matches('bai|csi|crai')) { + error "Index format not one of bai, csi, crai." + } else if (file_type == "sam") { + error "Indexing not compatible with SAM output." + } + } + """ + touch ${prefix}.${file_type} + ${index} + ${unselected} + ${unselected_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml new file mode 100644 index 0000000000..28c268a657 --- /dev/null +++ b/modules/nf-core/samtools/view/meta.yml @@ -0,0 +1,145 @@ +name: samtools_view +description: filter/convert SAM/BAM/CRAM file +keywords: + - view + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index: + type: file + description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) + pattern: "*.{.bai,.csi,.crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" + - - index_format: + type: string + description: Index format, used together with ext.args = '--write-index' + pattern: "bai|csi|crai" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: optional filtered/converted BAM file + pattern: "*.{bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: optional filtered/converted CRAM file + pattern: "*.{cram}" + - sam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.sam: + type: file + description: optional filtered/converted SAM file + pattern: "*.{sam}" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.bai: + type: file + description: optional BAM file index + pattern: "*.{bai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.csi: + type: file + description: optional tabix BAM file index + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.crai: + type: file + description: optional CRAM file index + pattern: "*.{crai}" + - unselected: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}: + type: file + description: optional file with unselected alignments + pattern: "*.unselected.{bam,cram,sam}" + - unselected_index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}.{csi,crai}: + type: file + description: index for the "unselected" file + pattern: "*.unselected.{csi,crai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" +maintainers: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/sentieon/applyvarcal/environment.yml b/modules/nf-core/sentieon/applyvarcal/environment.yml new file mode 100644 index 0000000000..dae76d1b6d --- /dev/null +++ b/modules/nf-core/sentieon/applyvarcal/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::sentieon=202503.01 diff --git a/modules/nf-core/sentieon/applyvarcal/main.nf b/modules/nf-core/sentieon/applyvarcal/main.nf new file mode 100644 index 0000000000..8f4ad60adf --- /dev/null +++ b/modules/nf-core/sentieon/applyvarcal/main.nf @@ -0,0 +1,62 @@ +process SENTIEON_APPLYVARCAL { + tag "${meta.id}" + label 'process_low' + label 'sentieon' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/0f/0f1dfe59ef66d7326b43db9ab1f39ce6220b358a311078c949a208f9c9815d4e/data' + : 'community.wave.seqera.io/library/sentieon:202503.01--1863def31ed8e4d5'}" + + input: + tuple val(meta), path(vcf), path(vcf_tbi), path(recal), path(recal_index), path(tranches) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi"), emit: tbi + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_applyvarcal" + def sentieonLicense = secrets.SENTIEON_LICENSE_BASE64 + ? "export SENTIEON_LICENSE=\$(mktemp);echo -e \"${secrets.SENTIEON_LICENSE_BASE64}\" | base64 -d > \$SENTIEON_LICENSE; " + : "" + """ + ${sentieonLicense} + + sentieon driver \\ + -r ${fasta} \\ + -t ${task.cpus} \\ + ${args} \\ + --algo ApplyVarCal \\ + -v ${vcf} \\ + --recal ${recal} \\ + --tranches_file ${tranches} \\ + ${args2} \\ + ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}_applyvarcal" + """ + echo | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/applyvarcal/meta.yml b/modules/nf-core/sentieon/applyvarcal/meta.yml new file mode 100644 index 0000000000..9e4a21554b --- /dev/null +++ b/modules/nf-core/sentieon/applyvarcal/meta.yml @@ -0,0 +1,109 @@ +name: sentieon_applyvarcal +description: | + Apply a score cutoff to filter variants based on a recalibration table. + Sentieon's Aplyvarcal performs the second pass in a two-stage process called Variant Quality Score Recalibration (VQSR). + Specifically, it applies filtering to the input variants based on the recalibration table produced + in the previous step VarCal and a target sensitivity value. + https://support.sentieon.com/manual/usages/general/#applyvarcal-algorithm +keywords: + - sentieon + - applyvarcal + - varcal + - VQSR +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: file + description: VCF file to be recalibrated, this should be the same file as used + for the first stage VariantRecalibrator. + pattern: "*.vcf" + ontologies: [] + - vcf_tbi: + type: file + description: tabix index for the input vcf file. + pattern: "*.vcf.tbi" + ontologies: [] + - recal: + type: file + description: Recalibration file produced when the input vcf was run through + VariantRecalibrator in stage 1. + pattern: "*.recal" + ontologies: [] + - recal_index: + type: file + description: Index file for the recalibration file. + pattern: ".recal.idx" + ontologies: [] + - tranches: + type: file + description: Tranches file produced when the input vcf was run through VariantRecalibrator + in stage 1. + pattern: ".tranches" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - "*.vcf.gz": + type: file + description: compressed vcf file containing the recalibrated variants. + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - "*.tbi": + type: file + description: Index of recalibrated vcf file. + pattern: "*vcf.gz.tbi" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions. + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@assp8200" +maintainers: + - "@assp8200" diff --git a/modules/nf-core/sentieon/bwamem/environment.yml b/modules/nf-core/sentieon/bwamem/environment.yml new file mode 100644 index 0000000000..dae76d1b6d --- /dev/null +++ b/modules/nf-core/sentieon/bwamem/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::sentieon=202503.01 diff --git a/modules/nf-core/sentieon/bwamem/main.nf b/modules/nf-core/sentieon/bwamem/main.nf new file mode 100644 index 0000000000..ae7ed2f7ba --- /dev/null +++ b/modules/nf-core/sentieon/bwamem/main.nf @@ -0,0 +1,70 @@ +process SENTIEON_BWAMEM { + tag "${meta.id}" + label 'process_high' + label 'sentieon' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/0f/0f1dfe59ef66d7326b43db9ab1f39ce6220b358a311078c949a208f9c9815d4e/data' + : 'community.wave.seqera.io/library/sentieon:202503.01--1863def31ed8e4d5'}" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) + tuple val(meta4), path(fasta_fai) + + output: + tuple val(meta), path("${prefix}"), path("${prefix}.{bai,crai}"), emit: bam_and_bai + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}.bam" + def sentieonLicense = secrets.SENTIEON_LICENSE_BASE64 + ? "export SENTIEON_LICENSE=\$(mktemp);echo -e \"${secrets.SENTIEON_LICENSE_BASE64}\" | base64 -d > \$SENTIEON_LICENSE; " + : "" + + """ + ${sentieonLicense} + export bwt_max_mem="${(task.memory * 0.9).toGiga()}G" + + INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` + + sentieon bwa mem \\ + ${args} \\ + -t ${task.cpus} \\ + \$INDEX \\ + ${reads} \\ + | sentieon util sort -r ${fasta} -t ${task.cpus} -o ${prefix} --sam2bam - + + # Delete *.bai file if prefix ends with .cram + if [[ "${prefix}" == *.cram ]]; then + rm -f "${prefix}.bai" + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(sentieon bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.bam" + index = prefix.tokenize('.')[-1] == "bam" ? "bai" : "crai" + + """ + touch ${prefix} + touch ${prefix}.${index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(sentieon bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/bwamem/meta.yml b/modules/nf-core/sentieon/bwamem/meta.yml new file mode 100644 index 0000000000..3f4f99e5cb --- /dev/null +++ b/modules/nf-core/sentieon/bwamem/meta.yml @@ -0,0 +1,87 @@ +name: sentieon_bwamem +description: Performs fastq alignment to a fasta reference using Sentieon's BWA MEM +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sentieon +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Genome fastq files (single-end or paired-end) + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "*.{amb,ann,bwt,pac,sa}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta_fai: + type: file + description: The index of the FASTA reference. + pattern: "*.fai" + ontologies: [] +output: + bam_and_bai: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - ${prefix}: + type: file + description: BAM file with corresponding index. + pattern: "*.{bam,bai}" + ontologies: [] + - ${prefix}.{bai,crai}: + type: file + description: BAM file with corresponding index. + pattern: "*.{bam,bai}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@asp8200" +maintainers: + - "@asp8200" + - "@DonFreed" diff --git a/modules/nf-core/sentieon/dedup/environment.yml b/modules/nf-core/sentieon/dedup/environment.yml new file mode 100644 index 0000000000..dae76d1b6d --- /dev/null +++ b/modules/nf-core/sentieon/dedup/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::sentieon=202503.01 diff --git a/modules/nf-core/sentieon/dedup/main.nf b/modules/nf-core/sentieon/dedup/main.nf new file mode 100644 index 0000000000..c6e9de5683 --- /dev/null +++ b/modules/nf-core/sentieon/dedup/main.nf @@ -0,0 +1,75 @@ +process SENTIEON_DEDUP { + tag "${meta.id}" + label 'process_medium' + label 'sentieon' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/0f/0f1dfe59ef66d7326b43db9ab1f39ce6220b358a311078c949a208f9c9815d4e/data' + : 'community.wave.seqera.io/library/sentieon:202503.01--1863def31ed8e4d5'}" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fasta_fai) + + output: + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.bai"), emit: bai + tuple val(meta), path("*.score"), emit: score + tuple val(meta), path("*.metrics"), emit: metrics + tuple val(meta), path("*.metrics.multiqc.tsv"), emit: metrics_multiqc_tsv + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}.cram" + def metrics = task.ext.metrics ?: "${prefix}.metrics" + def input_list = bam.collect { "-i ${it}" }.join(' ') + def prefix_basename = prefix.substring(0, prefix.lastIndexOf(".")) + def sentieonLicense = secrets.SENTIEON_LICENSE_BASE64 + ? "export SENTIEON_LICENSE=\$(mktemp);echo -e \"${secrets.SENTIEON_LICENSE_BASE64}\" | base64 -d > \$SENTIEON_LICENSE; " + : "" + """ + ${sentieonLicense} + + sentieon driver ${args} -t ${task.cpus} ${input_list} -r ${fasta} --algo LocusCollector ${args2} --fun score_info ${prefix_basename}.score + sentieon driver ${args3} -t ${task.cpus} ${input_list} -r ${fasta} --algo Dedup ${args4} --score_info ${prefix_basename}.score --metrics ${metrics} ${prefix} + + # This following tsv-file is produced in order to get a proper tsv-file with Dedup-metrics for importing in MultiQC as "custom content". + # It should be removed once MultiQC has a module for displaying Dedup-metrics. + head -3 ${metrics} > ${metrics}.multiqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}.cram" + def metrics = task.ext.metrics ?: "${prefix}.metrics" + def prefix_basename = prefix.substring(0, prefix.lastIndexOf(".")) + + """ + touch "${prefix}" + touch "${prefix}.crai" + touch "${prefix}.bai" + touch "${metrics}" + touch "${metrics}.multiqc.tsv" + touch "${prefix_basename}.score" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/dedup/meta.yml b/modules/nf-core/sentieon/dedup/meta.yml new file mode 100644 index 0000000000..1a26d54296 --- /dev/null +++ b/modules/nf-core/sentieon/dedup/meta.yml @@ -0,0 +1,146 @@ +name: sentieon_dedup +description: Runs the sentieon tool LocusCollector followed by Dedup. LocusCollector + collects read information that is used by Dedup which in turn marks or removes duplicate + reads. +keywords: + - mem + - dedup + - map + - bam + - cram + - sentieon +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file. + pattern: "*.bam" + ontologies: [] + - bai: + type: file + description: BAI file + pattern: "*.bai" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta_fai: + type: file + description: The index of the FASTA reference. + pattern: "*.fai" + ontologies: [] +output: + cram: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: CRAM file + pattern: "*.cram" + ontologies: [] + crai: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file + pattern: "*.crai" + ontologies: [] + bam: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: BAM file. + pattern: "*.bam" + ontologies: [] + bai: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAI file + pattern: "*.bai" + ontologies: [] + score: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - "*.score": + type: file + description: The score file indicates which reads LocusCollector finds are + likely duplicates. + pattern: "*.score" + ontologies: [] + metrics: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - "*.metrics": + type: file + description: Output file containing Dedup metrics incl. histogram data. + pattern: "*.metrics" + ontologies: [] + metrics_multiqc_tsv: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - "*.metrics.multiqc.tsv": + type: file + description: Output tsv-file containing Dedup metrics excl. histogram data. + pattern: "*.metrics.multiqc.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@asp8200" +maintainers: + - "@asp8200" diff --git a/modules/nf-core/sentieon/dnamodelapply/environment.yml b/modules/nf-core/sentieon/dnamodelapply/environment.yml new file mode 100644 index 0000000000..dae76d1b6d --- /dev/null +++ b/modules/nf-core/sentieon/dnamodelapply/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::sentieon=202503.01 diff --git a/modules/nf-core/sentieon/dnamodelapply/main.nf b/modules/nf-core/sentieon/dnamodelapply/main.nf new file mode 100644 index 0000000000..a261830316 --- /dev/null +++ b/modules/nf-core/sentieon/dnamodelapply/main.nf @@ -0,0 +1,60 @@ +process SENTIEON_DNAMODELAPPLY { + tag "${meta.id}" + label 'process_high' + label 'sentieon' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/0f/0f1dfe59ef66d7326b43db9ab1f39ce6220b358a311078c949a208f9c9815d4e/data' + : 'community.wave.seqera.io/library/sentieon:202503.01--1863def31ed8e4d5'}" + + input: + tuple val(meta), path(vcf), path(idx) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(ml_model) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.vcf.gz.tbi"), emit: tbi + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_applied" + def sentieonLicense = secrets.SENTIEON_LICENSE_BASE64 + ? "export SENTIEON_LICENSE=\$(mktemp);echo -e \"${secrets.SENTIEON_LICENSE_BASE64}\" | base64 -d > \$SENTIEON_LICENSE; " + : "" + """ + ${sentieonLicense} + + sentieon driver \\ + -t ${task.cpus} \\ + -r ${fasta} \\ + ${args} \\ + --algo DNAModelApply \\ + --model ${ml_model} \\ + -v ${vcf} \\ + ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}_applied" + """ + echo | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/dnamodelapply/meta.yml b/modules/nf-core/sentieon/dnamodelapply/meta.yml new file mode 100644 index 0000000000..8338129ba4 --- /dev/null +++ b/modules/nf-core/sentieon/dnamodelapply/meta.yml @@ -0,0 +1,95 @@ +name: sentieon_dnamodelapply +description: modifies the input VCF file by adding the MLrejected FILTER to the variants +keywords: + - dnamodelapply + - vcf + - filter + - sentieon +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - vcf: + type: file + description: INPUT VCF file + pattern: "*.{vcf,vcf.gz}" + ontologies: [] + - idx: + type: file + description: Index of the input VCF file + pattern: "*.{tbi}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - fai: + type: file + description: Index of the genome fasta file + pattern: "*.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - ml_model: + type: file + description: machine learning model file + pattern: "*.model" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.vcf.gz": + type: file + description: INPUT VCF file + pattern: "*.{vcf,vcf.gz}" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.vcf.gz.tbi": + type: file + description: Index of the input VCF file + pattern: "*.{tbi}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/sentieon/dnascope/environment.yml b/modules/nf-core/sentieon/dnascope/environment.yml new file mode 100644 index 0000000000..dae76d1b6d --- /dev/null +++ b/modules/nf-core/sentieon/dnascope/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::sentieon=202503.01 diff --git a/modules/nf-core/sentieon/dnascope/main.nf b/modules/nf-core/sentieon/dnascope/main.nf new file mode 100644 index 0000000000..49d99f320b --- /dev/null +++ b/modules/nf-core/sentieon/dnascope/main.nf @@ -0,0 +1,88 @@ +process SENTIEON_DNASCOPE { + tag "${meta.id}" + label 'process_high' + label 'sentieon' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/0f/0f1dfe59ef66d7326b43db9ab1f39ce6220b358a311078c949a208f9c9815d4e/data' + : 'community.wave.seqera.io/library/sentieon:202503.01--1863def31ed8e4d5'}" + + input: + tuple val(meta), path(bam), path(bai), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dbsnp) + tuple val(meta5), path(dbsnp_tbi) + tuple val(meta6), path(ml_model) + val pcr_indel_model + val emit_vcf + val emit_gvcf + + output: + // added the substring ".unfiltered" in the filename of the vcf-files since without that the g.vcf.gz-files were ending up in the vcf-channel + tuple val(meta), path("*.unfiltered.vcf.gz"), emit: vcf, optional: true + tuple val(meta), path("*.unfiltered.vcf.gz.tbi"), emit: vcf_tbi, optional: true + // these output-files have to have the extension ".vcf.gz", otherwise the subsequent GATK-MergeVCFs will fail. + tuple val(meta), path("*.g.vcf.gz"), emit: gvcf, optional: true + tuple val(meta), path("*.g.vcf.gz.tbi"), emit: gvcf_tbi, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + // options for the driver + def args2 = task.ext.args2 ?: '' + // options for the vcf generation + def args3 = task.ext.args3 ?: '' + // options for the gvcf generation + def interval = intervals ? "--interval ${intervals}" : '' + def dbsnp_cmd = dbsnp ? "-d ${dbsnp}" : '' + def model_cmd = ml_model ? " --model ${ml_model}" : '' + def pcr_indel_model_cmd = pcr_indel_model ? " --pcr_indel_model ${pcr_indel_model}" : '' + def prefix = task.ext.prefix ?: "${meta.id}" + def vcf_cmd = "" + def gvcf_cmd = "" + def base_cmd = '--algo DNAscope ' + dbsnp_cmd + ' ' + + if (emit_vcf) { + // emit_vcf can be the empty string, 'variant', 'confident' or 'all' but NOT 'gvcf' + vcf_cmd = base_cmd + args2 + ' ' + model_cmd + pcr_indel_model_cmd + ' --emit_mode ' + emit_vcf + ' ' + prefix + '.unfiltered.vcf.gz' + } + + if (emit_gvcf) { + // emit_gvcf can be either true or false + gvcf_cmd = base_cmd + args3 + ' ' + model_cmd + pcr_indel_model_cmd + ' --emit_mode gvcf ' + prefix + '.g.vcf.gz' + } + + def sentieonLicense = secrets.SENTIEON_LICENSE_BASE64 + ? "export SENTIEON_LICENSE=\$(mktemp);echo -e \"${secrets.SENTIEON_LICENSE_BASE64}\" | base64 -d > \$SENTIEON_LICENSE; " + : "" + """ + ${sentieonLicense} + + sentieon driver ${args} -r ${fasta} -t ${task.cpus} -i ${bam} ${interval} ${vcf_cmd} ${gvcf_cmd} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def gvcf_cmd = emit_gvcf ? "echo | gzip > ${prefix}.g.vcf.gz; touch ${prefix}.g.vcf.gz.tbi" : "" + + """ + echo | gzip > ${prefix}.unfiltered.vcf.gz + touch ${prefix}.unfiltered.vcf.gz.tbi + ${gvcf_cmd} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/dnascope/meta.yml b/modules/nf-core/sentieon/dnascope/meta.yml new file mode 100644 index 0000000000..767da593f9 --- /dev/null +++ b/modules/nf-core/sentieon/dnascope/meta.yml @@ -0,0 +1,156 @@ +name: sentieon_dnascope +description: DNAscope algorithm performs an improved version of Haplotype variant + calling. +keywords: + - dnascope + - sentieon + - variant_calling +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file. + pattern: "*.bam" + ontologies: [] + - bai: + type: file + description: BAI file + pattern: "*.bai" + ontologies: [] + - intervals: + type: file + description: bed or interval_list file containing interval in the reference + that will be used in the analysis + pattern: "*.{bed,interval_list}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing meta information for fasta. + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing meta information for fasta index. + - fai: + type: file + description: Index of the genome fasta file + pattern: "*.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing meta information for dbsnp. + - dbsnp: + type: file + description: Single Nucleotide Polymorphism database (dbSNP) file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - - meta5: + type: map + description: | + Groovy Map containing meta information for dbsnp_tbi. + - dbsnp_tbi: + type: file + description: Index of the Single Nucleotide Polymorphism database (dbSNP) file + pattern: "*.vcf.gz.tbi" + ontologies: [] + - - meta6: + type: map + description: | + Groovy Map containing meta information for machine learning model for Dnascope. + - ml_model: + type: file + description: machine learning model file + pattern: "*.model" + ontologies: [] + - pcr_indel_model: + type: string + description: | + Controls the option pcr_indel_model for Dnascope. + The possible options are "NONE" (used for PCR free samples), and "HOSTILE", "AGGRESSIVE" and "CONSERVATIVE". + See Sentieons documentation for further explanation. + - emit_vcf: + type: string + description: | + Controls the vcf output from Dnascope. + Possible options are "all", "confident" and "variant". + See Sentieons documentation for further explanation. + - emit_gvcf: + type: boolean + description: If true, the haplotyper will output a gvcf +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - "*.unfiltered.vcf.gz": + type: file + description: Compressed VCF file + pattern: "*.unfiltered.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + vcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - "*.unfiltered.vcf.gz.tbi": + type: file + description: Index of VCF file + pattern: "*.unfiltered.vcf.gz.tbi" + ontologies: [] + gvcf: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - "*.g.vcf.gz": + type: file + description: Compressed GVCF file + pattern: "*.g.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + gvcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - "*.g.vcf.gz.tbi": + type: file + description: Index of GVCF file + pattern: "*.g.vcf.gz.tbi" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/sentieon/gvcftyper/environment.yml b/modules/nf-core/sentieon/gvcftyper/environment.yml new file mode 100644 index 0000000000..dae76d1b6d --- /dev/null +++ b/modules/nf-core/sentieon/gvcftyper/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::sentieon=202503.01 diff --git a/modules/nf-core/sentieon/gvcftyper/main.nf b/modules/nf-core/sentieon/gvcftyper/main.nf new file mode 100644 index 0000000000..1f97866d3f --- /dev/null +++ b/modules/nf-core/sentieon/gvcftyper/main.nf @@ -0,0 +1,55 @@ +process SENTIEON_GVCFTYPER { + tag "${meta.id}" + label 'process_high' + label 'sentieon' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/0f/0f1dfe59ef66d7326b43db9ab1f39ce6220b358a311078c949a208f9c9815d4e/data' + : 'community.wave.seqera.io/library/sentieon:202503.01--1863def31ed8e4d5'}" + + input: + tuple val(meta), path(gvcfs), path(tbis), path(intervals) + tuple val(meta1), path(fasta) + tuple val(meta2), path(fai) + tuple val(meta3), path(dbsnp) + tuple val(meta4), path(dbsnp_tbi) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf_gz + tuple val(meta), path("*.vcf.gz.tbi"), emit: vcf_gz_tbi + path ("versions.yml"), emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def gvcfs_input = '-v ' + gvcfs.join(' -v ') + def dbsnp_cmd = dbsnp ? "--dbsnp ${dbsnp}" : "" + def sentieonLicense = secrets.SENTIEON_LICENSE_BASE64 + ? "export SENTIEON_LICENSE=\$(mktemp);echo -e \"${secrets.SENTIEON_LICENSE_BASE64}\" | base64 -d > \$SENTIEON_LICENSE; " + : "" + """ + ${sentieonLicense} + + sentieon driver -r ${fasta} --algo GVCFtyper ${gvcfs_input} ${dbsnp_cmd} ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip >${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/gvcftyper/meta.yml b/modules/nf-core/sentieon/gvcftyper/meta.yml new file mode 100644 index 0000000000..a45971ea08 --- /dev/null +++ b/modules/nf-core/sentieon/gvcftyper/meta.yml @@ -0,0 +1,114 @@ +name: sentieon_gvcftyper +description: | + Perform joint genotyping on one or more samples pre-called with Sentieon's Haplotyper. +keywords: + - joint genotyping + - genotype + - gvcf +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gvcfs: + type: file + description: | + gVCF(.gz) file + pattern: "*.{vcf,vcf.gz}" + ontologies: [] + - tbis: + type: file + description: | + index of gvcf file + pattern: "*.tbi" + ontologies: [] + - intervals: + type: file + description: Interval file with the genomic regions included in the library + (optional) + ontologies: [] + - - meta1: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Reference fasta file + pattern: "*.fasta" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: Reference fasta index file + pattern: "*.fai" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - dbsnp: + type: file + description: dbSNP VCF file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - dbsnp_tbi: + type: file + description: dbSNP VCF index file + pattern: "*.tbi" + ontologies: [] +output: + vcf_gz: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz": + type: file + description: VCF file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + vcf_gz_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz.tbi": + type: file + description: VCF index file + pattern: "*.vcf.gz.tbi" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@asp8200" +maintainers: + - "@asp8200" diff --git a/modules/nf-core/sentieon/haplotyper/environment.yml b/modules/nf-core/sentieon/haplotyper/environment.yml new file mode 100644 index 0000000000..dae76d1b6d --- /dev/null +++ b/modules/nf-core/sentieon/haplotyper/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::sentieon=202503.01 diff --git a/modules/nf-core/sentieon/haplotyper/main.nf b/modules/nf-core/sentieon/haplotyper/main.nf new file mode 100644 index 0000000000..80cd4d0de9 --- /dev/null +++ b/modules/nf-core/sentieon/haplotyper/main.nf @@ -0,0 +1,93 @@ +process SENTIEON_HAPLOTYPER { + tag "${meta.id}" + label 'process_medium' + label 'sentieon' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/0f/0f1dfe59ef66d7326b43db9ab1f39ce6220b358a311078c949a208f9c9815d4e/data' + : 'community.wave.seqera.io/library/sentieon:202503.01--1863def31ed8e4d5'}" + + input: + tuple val(meta), path(input), path(input_index), path(intervals), path(recal_table) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dbsnp) + tuple val(meta5), path(dbsnp_tbi) + val emit_vcf + val emit_gvcf + + output: + // added the substring ".unfiltered" in the filename of the vcf-files since without that the g.vcf.gz-files were ending up in the vcf-channel + tuple val(meta), path("*.unfiltered.vcf.gz"), emit: vcf, optional: true + tuple val(meta), path("*.unfiltered.vcf.gz.tbi"), emit: vcf_tbi, optional: true + // these output-files have to have the extension ".vcf.gz", otherwise the subsequent GATK-MergeVCFs will fail. + tuple val(meta), path("*.g.vcf.gz"), emit: gvcf, optional: true + tuple val(meta), path("*.g.vcf.gz.tbi"), emit: gvcf_tbi, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' // options for the driver + def args2 = task.ext.args2 ?: '' // options for the vcf generation + def args3 = task.ext.args3 ?: '' // options for the gvcf generation + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = input instanceof List ? input.collect { "-i ${it}" }.join(' ') : "-i ${input}" + def dbsnp_command = dbsnp ? "-d ${dbsnp} " : "" + def interval_command = intervals ? "--interval ${intervals}" : "" + def recal_table_command = recal_table ? "-q ${recal_table}" : "" + def base_cmd = '--algo Haplotyper ' + dbsnp_command + + // The Sentieon --algo Haplotyper can create a VCF or gVCF but not both + // Luckily, we can run it twice while reading the BAM once, therefore we construct the two separate commands + // and run them twice while using the sentieon driver once. This allows us to create both types of VCF indels + // one process + + // Create VCF command to export a VCF + def vcf_cmd = emit_vcf + ? base_cmd + args2 + ' --emit_mode ' + emit_vcf + ' ' + prefix + '.unfiltered.vcf.gz' + : "" + + // Create a gVCF command to export a gVCF + def gvcf_cmd = emit_gvcf + ? base_cmd + args3 + ' --emit_mode gvcf ' + prefix + '.g.vcf.gz' + : "" + + def sentieonLicense = secrets.SENTIEON_LICENSE_BASE64 + ? "export SENTIEON_LICENSE=\$(mktemp);echo -e \"${secrets.SENTIEON_LICENSE_BASE64}\" | base64 -d > \$SENTIEON_LICENSE; " + : "" + """ + ${sentieonLicense} + + sentieon driver \\ + ${args} \\ + -r ${fasta} \\ + -t ${task.cpus} \\ + ${interval_command} \\ + ${input_list} \\ + ${recal_table_command} \\ + ${vcf_cmd} \\ + ${gvcf_cmd} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.unfiltered.vcf.gz + touch ${prefix}.unfiltered.vcf.gz.tbi + echo "" | gzip > ${prefix}.g.vcf.gz + touch ${prefix}.g.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/haplotyper/meta.yml b/modules/nf-core/sentieon/haplotyper/meta.yml new file mode 100644 index 0000000000..f63eff8c37 --- /dev/null +++ b/modules/nf-core/sentieon/haplotyper/meta.yml @@ -0,0 +1,144 @@ +name: sentieon_haplotyper +description: Runs Sentieon's haplotyper for germline variant calling. +keywords: + - sentieon + - haplotypecaller + - haplotype +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + ontologies: [] + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + ontologies: [] + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + ontologies: [] + - recal_table: + type: file + description: Recalibration table from sentieon/qualcal (optional) + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: The index of the FASTA reference. + pattern: "*.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - dbsnp: + type: file + description: VCF file containing known sites (optional) + ontologies: [] + - - meta5: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - dbsnp_tbi: + type: file + description: VCF index of dbsnp (optional) + ontologies: [] + - emit_vcf: + type: string + description: | + Controls the vcf output from the haplotyper. + If emit_vcf is set to "all" then the haplotyper will output a vcf generated by the haplotyper in emit-mode "all". + If emit_vcf is set to "confident" then the haplotyper will output a vcf generated by the haplotyper in emit-mode "confident". + If emit_vcf is set to "variant" then the haplotyper will output a vcf generated by the haplotyper in emit_mode "confident". + - emit_gvcf: + type: boolean + description: If true, the haplotyper will output a gvcf +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - "*.unfiltered.vcf.gz": + type: file + description: Compressed VCF file + pattern: "*.unfiltered.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + vcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - "*.unfiltered.vcf.gz.tbi": + type: file + description: Index of VCF file + pattern: "*.unfiltered.vcf.gz.tbi" + ontologies: [] + gvcf: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - "*.g.vcf.gz": + type: file + description: Compressed GVCF file + pattern: "*.g.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + gvcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - "*.g.vcf.gz.tbi": + type: file + description: Index of GVCF file + pattern: "*.g.vcf.gz.tbi" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@asp8200" +maintainers: + - "@asp8200" diff --git a/modules/nf-core/sentieon/tnscope/environment.yml b/modules/nf-core/sentieon/tnscope/environment.yml new file mode 100644 index 0000000000..dae76d1b6d --- /dev/null +++ b/modules/nf-core/sentieon/tnscope/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::sentieon=202503.01 diff --git a/modules/nf-core/sentieon/tnscope/main.nf b/modules/nf-core/sentieon/tnscope/main.nf new file mode 100644 index 0000000000..cdce359d31 --- /dev/null +++ b/modules/nf-core/sentieon/tnscope/main.nf @@ -0,0 +1,76 @@ +process SENTIEON_TNSCOPE { + tag "${meta.id}" + label 'process_high' + label 'sentieon' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/0f/0f1dfe59ef66d7326b43db9ab1f39ce6220b358a311078c949a208f9c9815d4e/data' + : 'community.wave.seqera.io/library/sentieon:202503.01--1863def31ed8e4d5'}" + + input: + tuple val(meta), path(input), path(input_index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dbsnp) + tuple val(meta5), path(dbsnp_tbi) + tuple val(meta6), path(pon) + tuple val(meta7), path(pon_tbi) + tuple val(meta8), path(cosmic) + tuple val(meta9), path(cosmic_tbi) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.vcf.gz.tbi"), emit: index + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def interval_str = intervals ? "--interval ${intervals}" : '' + def cosmic_str = cosmic ? "--cosmic ${cosmic}" : '' + def dbsnp_str = dbsnp ? "--dbsnp ${dbsnp}" : '' + def pon_str = pon ? "--pon ${pon}" : '' + def prefix = task.ext.prefix ?: "${meta.id}" + def inputs = input.collect { "-i ${it}" }.join(" ") + def sentieonLicense = secrets.SENTIEON_LICENSE_BASE64 + ? "export SENTIEON_LICENSE=\$(mktemp);echo -e \"${secrets.SENTIEON_LICENSE_BASE64}\" | base64 -d > \$SENTIEON_LICENSE; " + : "" + """ + ${sentieonLicense} + + + sentieon driver \\ + -t ${task.cpus} \\ + -r ${fasta} \\ + ${inputs} \\ + ${interval_str} \\ + ${args} \\ + --algo TNscope \\ + ${args2} \\ + ${cosmic_str} \\ + ${dbsnp_str} \\ + ${pon_str} \\ + ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/tnscope/meta.yml b/modules/nf-core/sentieon/tnscope/meta.yml new file mode 100644 index 0000000000..7b6bbc402a --- /dev/null +++ b/modules/nf-core/sentieon/tnscope/meta.yml @@ -0,0 +1,156 @@ +name: sentieon_tnscope +description: TNscope algorithm performs somatic variant calling on the tumor-normal + matched pair or the tumor only data, using a Haplotyper algorithm. +keywords: + - tnscope + - sentieon + - variant_calling +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. [ id:'test' ] + - input: + type: file + description: One or more BAM or CRAM files. + pattern: "*.{bam,cram}" + ontologies: [] + - input_index: + type: file + description: Indices for the input files + pattern: "*.{bai,crai}" + ontologies: [] + - intervals: + type: file + description: bed or interval_list file containing interval in the reference + that will be used in the analysis. Only recommended for large WGS data, else + the overhead may not be worth the additional parallelisation. + pattern: "*.{bed,interval_list}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - fai: + type: file + description: Index of the genome fasta file + pattern: "*.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - dbsnp: + type: file + description: Single Nucleotide Polymorphism database (dbSNP) file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - - meta5: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - dbsnp_tbi: + type: file + description: Index of the Single Nucleotide Polymorphism database (dbSNP) file + pattern: "*.vcf.gz.tbi" + ontologies: [] + - - meta6: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - pon: + type: file + description: Single Nucleotide Polymorphism database (dbSNP) file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - - meta7: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - pon_tbi: + type: file + description: Index of the Single Nucleotide Polymorphism database (dbSNP) file + pattern: "*.vcf.gz.tbi" + ontologies: [] + - - meta8: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - cosmic: + type: file + description: Catalogue of Somatic Mutations in Cancer (COSMIC) VCF file. + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - - meta9: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - cosmic_tbi: + type: file + description: Index of the Catalogue of Somatic Mutations in Cancer (COSMIC) + VCF file. + pattern: "*.vcf.gz.tbi" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - "*.vcf.gz": + type: file + description: VCF file + pattern: "*.{vcf.gz}" + ontologies: [] + index: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - "*.vcf.gz.tbi": + type: file + description: Index of the VCF file + pattern: "*.vcf.gz.tbi" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/sentieon/varcal/environment.yml b/modules/nf-core/sentieon/varcal/environment.yml new file mode 100644 index 0000000000..dae76d1b6d --- /dev/null +++ b/modules/nf-core/sentieon/varcal/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::sentieon=202503.01 diff --git a/modules/nf-core/sentieon/varcal/main.nf b/modules/nf-core/sentieon/varcal/main.nf new file mode 100644 index 0000000000..f9f3e8ecc5 --- /dev/null +++ b/modules/nf-core/sentieon/varcal/main.nf @@ -0,0 +1,98 @@ +process SENTIEON_VARCAL { + tag "${meta.id}" + label 'process_low' + label 'sentieon' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/0f/0f1dfe59ef66d7326b43db9ab1f39ce6220b358a311078c949a208f9c9815d4e/data' + : 'community.wave.seqera.io/library/sentieon:202503.01--1863def31ed8e4d5'}" + + input: + tuple val(meta), path(vcf), path(tbi) + path resource_vcf + path resource_tbi + val labels + path fasta + path fai + + output: + tuple val(meta), path("*.recal"), emit: recal + tuple val(meta), path("*.idx"), emit: idx + tuple val(meta), path("*.tranches"), emit: tranches + tuple val(meta), path("*plots.R"), emit: plots, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + // Process labels to create the command string + // labels is a list. Here is an example of what labels might look like: + // ['--resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_146.hg38.vcf.gz', '--resource:gatk,known=false,training=true,truth=true,prior=10.0 Homo_sapiens_assembly38.known_indels.vcf.gz --resource:mills,known=false,training=true,truth=true,prior=10.0 Mills_and_1000G_gold_standard.indels.hg38.vcf.gz'] + def labels_command = '' + def labels_input = labels + if (labels_input instanceof String && !labels_input.trim().isEmpty()) { + // Process string input + def resourceStrings = labels_input.split('--resource:').findAll() + def processedResources = resourceStrings.collect { resource_string -> + def items = resource_string.split(' ', 2) + if (items.size() != 2) { + error("Expected the resource string '${resource_string}' to contain two elements separated by a space.") + } + "--resource ${items[1]} --resource_param ${items[0].replaceFirst('^--resource:', '')}" + } + labels_command = processedResources.join(' ') + } + else if (labels_input instanceof List) { + // Process list input + def processedResources = labels_input.collect { label -> + def cleanedLabel = label.replaceFirst('^--resource:', '') + def items = cleanedLabel.split(' ', 2) + if (items.size() != 2) { + error("Expected the resource string '${cleanedLabel}' to contain two elements separated by a space.") + } + "--resource ${items[1]} --resource_param ${items[0].replaceFirst('^--resource:', '')}" + } + labels_command = processedResources.join(' ') + } + else if (labels_input != null) { + error("Expected 'labels' to be either a String or a List, but got ${labels_input.getClass()}") + } + + def sentieonLicense = secrets.SENTIEON_LICENSE_BASE64 + ? "export SENTIEON_LICENSE=\$(mktemp);echo -e \"${secrets.SENTIEON_LICENSE_BASE64}\" | base64 -d > \$SENTIEON_LICENSE; " + : "" + """ + ${sentieonLicense} + + sentieon driver -r ${fasta} --algo VarCal \\ + -v ${vcf} \\ + --tranches_file ${prefix}.tranches \\ + ${labels_command} \\ + ${args} \\ + ${prefix}.recal + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.recal + touch ${prefix}.idx + touch ${prefix}.tranches + touch ${prefix}plots.R + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/varcal/meta.yml b/modules/nf-core/sentieon/varcal/meta.yml new file mode 100644 index 0000000000..9d7bd79ae5 --- /dev/null +++ b/modules/nf-core/sentieon/varcal/meta.yml @@ -0,0 +1,119 @@ +name: sentieon_varcal +description: | + Module for Sentieons VarCal. The VarCal algorithm calculates the Variant Quality Score Recalibration (VQSR). + VarCal builds a recalibration model for scoring variant quality. + https://support.sentieon.com/manual/usages/general/#varcal-algorithm +keywords: + - sentieon + - varcal + - variant recalibration +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - vcf: + type: file + description: input vcf file containing the variants to be recalibrated + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - tbi: + type: file + description: tbi file matching with -vcf + pattern: "*.vcf.gz.tbi" + ontologies: [] + - resource_vcf: + type: file + description: all resource vcf files that are used with the corresponding '--resource' + label + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - resource_tbi: + type: file + description: all resource tbi files that are used with the corresponding '--resource' + label + pattern: "*.vcf.gz.tbi" + ontologies: [] + - labels: + type: string + description: necessary arguments for Sentieon's VarCal. Specified to directly + match the resources provided. More information can be found at https://support.sentieon.com/manual/usages/general/#varcal-algorithm + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + ontologies: [] + - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + ontologies: [] +output: + recal: + - - meta: + type: file + description: Output recal file used by ApplyVQSR + pattern: "*.recal" + ontologies: [] + - "*.recal": + type: file + description: Output recal file used by ApplyVQSR + pattern: "*.recal" + ontologies: [] + idx: + - - meta: + type: file + description: Output recal file used by ApplyVQSR + pattern: "*.recal" + ontologies: [] + - "*.idx": + type: file + description: Index file for the recal output file + pattern: "*.idx" + ontologies: [] + tranches: + - - meta: + type: file + description: Output recal file used by ApplyVQSR + pattern: "*.recal" + ontologies: [] + - "*.tranches": + type: file + description: Output tranches file used by ApplyVQSR + pattern: "*.tranches" + ontologies: [] + plots: + - - meta: + type: file + description: Output recal file used by ApplyVQSR + pattern: "*.recal" + ontologies: [] + - "*plots.R": + type: file + description: Optional output rscript file to aid in visualization of the input + data and learned model. + pattern: "*plots.R" + ontologies: + - edam: http://edamontology.org/format_3999 # R script + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@asp8200" +maintainers: + - "@asp8200" diff --git a/modules/nf-core/snpeff/download/environment.yml b/modules/nf-core/snpeff/download/environment.yml new file mode 100644 index 0000000000..3659adcc0e --- /dev/null +++ b/modules/nf-core/snpeff/download/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/snpeff + - bioconda::snpeff=5.4.0a diff --git a/modules/nf-core/snpeff/download/main.nf b/modules/nf-core/snpeff/download/main.nf new file mode 100644 index 0000000000..d1d3b35993 --- /dev/null +++ b/modules/nf-core/snpeff/download/main.nf @@ -0,0 +1,44 @@ +process SNPEFF_DOWNLOAD { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/30/30669e5208952f30d59d0d559928772f082830d01a140a853fff13a2283a17b0/data' + : 'community.wave.seqera.io/library/snpeff:5.4.0a--eaf6ce30125b2b17'}" + + input: + tuple val(meta), val(snpeff_db) + + output: + tuple val(meta), path('snpeff_cache'), emit: cache + tuple val("${task.process}"), val('snpeff'), eval("snpEff -version 2>&1 | cut -f 2 -d '\t'"), topic: versions, emit: versions_snpeff + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def avail_mem = 6144 + if (!task.memory) { + log.info('[snpEff] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + snpEff \\ + -Xmx${avail_mem}M \\ + download ${snpeff_db} \\ + -dataDir \${PWD}/snpeff_cache \\ + ${args} + """ + + stub: + """ + mkdir -p snpeff_cache/${snpeff_db} + + touch snpeff_cache/${snpeff_db}/sequence.I.bin + touch snpeff_cache/${snpeff_db}/sequence.bin + """ +} diff --git a/modules/nf-core/snpeff/download/meta.yml b/modules/nf-core/snpeff/download/meta.yml new file mode 100644 index 0000000000..1f1ae1f426 --- /dev/null +++ b/modules/nf-core/snpeff/download/meta.yml @@ -0,0 +1,64 @@ +name: snpeff_download +description: Genetic variant annotation and functional effect prediction toolbox +keywords: + - annotation + - effect prediction + - snpeff + - variant + - vcf +tools: + - snpeff: + description: | + SnpEff is a variant annotation and effect prediction tool. + It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). + homepage: https://pcingola.github.io/SnpEff/ + documentation: https://pcingola.github.io/SnpEff/se_introduction/ + licence: ["MIT"] + identifier: biotools:snpeff +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - snpeff_db: + type: string + description: SnpEff database name + ontologies: [] +output: + cache: + - - meta: + type: file + description: | + snpEff cache + ontologies: [] + - snpeff_cache: + type: file + description: | + snpEff cache + ontologies: [] + versions_snpeff: + - - ${task.process}: + type: string + description: The process the versions were collected from + - snpeff: + type: string + description: The tool name + - "snpEff -version 2>&1 | cut -f 2 -d '\t'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - snpeff: + type: string + description: The tool name + - "snpEff -version 2>&1 | cut -f 2 -d '\t'": + type: string + description: The command used to generate the version of the tool +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/snpeff/snpeff/environment.yml b/modules/nf-core/snpeff/snpeff/environment.yml new file mode 100644 index 0000000000..3659adcc0e --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/snpeff + - bioconda::snpeff=5.4.0a diff --git a/modules/nf-core/snpeff/snpeff/main.nf b/modules/nf-core/snpeff/snpeff/main.nf new file mode 100644 index 0000000000..37d954a7dc --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/main.nf @@ -0,0 +1,56 @@ +process SNPEFF_SNPEFF { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/30/30669e5208952f30d59d0d559928772f082830d01a140a853fff13a2283a17b0/data' + : 'community.wave.seqera.io/library/snpeff:5.4.0a--eaf6ce30125b2b17'}" + + input: + tuple val(meta), path(vcf) + val db + tuple val(meta2), path(cache) + + output: + tuple val(meta), path("*.ann.vcf"), emit: vcf + tuple val(meta), val("${task.process}"), val('snpeff'), path("*.csv"), topic: multiqc_files, emit: report + tuple val(meta), val("${task.process}"), val('snpeff'), path("*.html"), topic: multiqc_files, emit: summary_html + tuple val(meta), val("${task.process}"), val('snpeff'), path("*.genes.txt"), topic: multiqc_files, emit: genes_txt + tuple val("${task.process}"), val('snpeff'), eval("snpEff -version 2>&1 | cut -f 2 -d '\t'"), topic: versions, emit: versions_snpeff + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def avail_mem = 6144 + if (!task.memory) { + log.info('[snpEff] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + def prefix = task.ext.prefix ?: "${meta.id}" + def cache_command = cache ? "-dataDir \${PWD}/${cache}" : "" + """ + snpEff \\ + -Xmx${avail_mem}M \\ + -XX:-UsePerfData \\ + ${db} \\ + ${args} \\ + -csvStats ${prefix}.csv \\ + ${cache_command} \\ + ${vcf} \\ + > ${prefix}.ann.vcf + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.ann.vcf + touch ${prefix}.csv + touch ${prefix}.html + touch ${prefix}.genes.txt + """ +} diff --git a/modules/nf-core/snpeff/snpeff/meta.yml b/modules/nf-core/snpeff/snpeff/meta.yml new file mode 100644 index 0000000000..a33abfdad9 --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/meta.yml @@ -0,0 +1,191 @@ +name: snpeff_snpeff +description: Genetic variant annotation and functional effect prediction toolbox +keywords: + - annotation + - effect prediction + - snpeff + - variant + - vcf +tools: + - snpeff: + description: | + SnpEff is a variant annotation and effect prediction tool. + It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). + homepage: https://pcingola.github.io/SnpEff/ + documentation: https://pcingola.github.io/SnpEff/se_introduction/ + licence: ["MIT"] + identifier: biotools:snpeff +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + vcf to annotate + ontologies: [] + - db: + type: string + description: | + which db to annotate with + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cache: + type: file + description: | + path to snpEff cache (optional) + ontologies: [] +output: + vcf: + - - meta: + type: file + description: | + annotated vcf + pattern: "*.ann.vcf" + ontologies: [] + - "*.ann.vcf": + type: file + description: | + annotated vcf + pattern: "*.ann.vcf" + ontologies: [] + report: + - - meta: + type: file + description: | + annotated vcf + pattern: "*.ann.vcf" + ontologies: [] + - ${task.process}: + type: string + description: The process + - snpeff: + type: string + description: The tool name + - "*.csv": + type: file + description: snpEff report csv file + pattern: "*.csv" + ontologies: + - edam: http://edamontology.org/format_3752 # CSV + summary_html: + - - meta: + type: file + description: | + annotated vcf + pattern: "*.ann.vcf" + ontologies: [] + - ${task.process}: + type: string + description: The process + - snpeff: + type: string + description: The tool name + - "*.html": + type: file + description: snpEff summary statistics in html file + pattern: "*.html" + ontologies: [] + genes_txt: + - - meta: + type: file + description: | + annotated vcf + pattern: "*.ann.vcf" + ontologies: [] + - ${task.process}: + type: string + description: The process + - snpeff: + type: string + description: The tool name + - "*.genes.txt": + type: file + description: txt (tab separated) file having counts of the number of variants + affecting each transcript and gene + pattern: "*.genes.txt" + ontologies: [] + versions_snpeff: + - - ${task.process}: + type: string + description: The process the versions were collected from + - snpeff: + type: string + description: The tool name + - "snpEff -version 2>&1 | cut -f 2 -d '\t'": + type: string + description: The command used to generate the version of the tool +topics: + multiqc_files: + - - meta: + type: file + description: | + annotated vcf + pattern: "*.ann.vcf" + ontologies: [] + - ${task.process}: + type: string + description: The process + - snpeff: + type: string + description: The tool name + - "*.csv": + type: file + description: snpEff report csv file + pattern: "*.csv" + ontologies: + - edam: http://edamontology.org/format_3752 # CSV + - - meta: + type: file + description: | + annotated vcf + pattern: "*.ann.vcf" + ontologies: [] + - ${task.process}: + type: string + description: The process + - snpeff: + type: string + description: The tool name + - "*.html": + type: file + description: snpEff summary statistics in html file + pattern: "*.html" + ontologies: [] + - - meta: + type: file + description: | + annotated vcf + pattern: "*.ann.vcf" + ontologies: [] + - ${task.process}: + type: string + description: The process + - snpeff: + type: string + description: The tool name + - "*.genes.txt": + type: file + description: txt (tab separated) file having counts of the number of variants + affecting each transcript and gene + pattern: "*.genes.txt" + ontologies: [] + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - snpeff: + type: string + description: The tool name + - "snpEff -version 2>&1 | cut -f 2 -d '\t'": + type: string + description: The command used to generate the version of the tool +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/snpsift/annmem/environment.yml b/modules/nf-core/snpsift/annmem/environment.yml new file mode 100644 index 0000000000..c252c50c1b --- /dev/null +++ b/modules/nf-core/snpsift/annmem/environment.yml @@ -0,0 +1,7 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.23 + - bioconda::snpsift=5.4.0a diff --git a/modules/nf-core/snpsift/annmem/main.nf b/modules/nf-core/snpsift/annmem/main.nf new file mode 100644 index 0000000000..c852b39a89 --- /dev/null +++ b/modules/nf-core/snpsift/annmem/main.nf @@ -0,0 +1,55 @@ +process SNPSIFT_ANNMEM { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a1/a116bb44e388ca83fea78d82fe8bdfd5cf3557254e2ec7dd3f1f17354880638c/data' : + 'community.wave.seqera.io/library/htslib_snpsift:ace461dff1cfc121' }" + + input: + tuple val(meta), path(vcf), path(vcf_tbi) + tuple path(db_vcf), path(db_vcf_tbi), path(db_vardb), val(db_fields), val(db_prefixes) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.vcf.gz.tbi"), emit: tbi + tuple val("${task.process}"), val('snpsift'), eval("SnpSift -version 2>&1 | grep -oE '[0-9]+\\.[0-9]+[a-z]?'"), emit: versions_snpsift, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def dbs = db_vcf instanceof List ? db_vcf : [db_vcf] + def all_fields = db_fields instanceof List ? db_fields : [db_fields] + def prefixes = db_prefixes instanceof List ? db_prefixes : [db_prefixes] + + // db_vardb is staged as input so it's present in the work directory; + // SnpSift finds it automatically next to the VCF as {vcf}.snpsift.vardb/ + def dbfile_args = dbs.withIndex().collect { db, i -> + def f = all_fields[i] + def fields = f instanceof List ? f.join(',') : f + def p = prefixes[i] + "-dbfile ${db}${fields ? " -fields ${fields}" : ''}${p ? " -prefix ${p}" : ''}" + } + + """ + SnpSift \\ + annmem \\ + ${args} \\ + ${dbfile_args.join(' \\\n ')} \\ + ${vcf} \\ + | bgzip -c > ${prefix}.vcf.gz + + tabix -p vcf ${prefix}.vcf.gz + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | bgzip -c > ${prefix}.vcf.gz + echo "" | gzip > ${prefix}.vcf.gz.tbi + """ +} diff --git a/modules/nf-core/snpsift/annmem/meta.yml b/modules/nf-core/snpsift/annmem/meta.yml new file mode 100644 index 0000000000..dd68994978 --- /dev/null +++ b/modules/nf-core/snpsift/annmem/meta.yml @@ -0,0 +1,109 @@ +name: "snpsift_annmem" +description: | + Annotate VCF files using pre-built SnpSift annMem databases. + Enriches input VCF records by querying memory-optimized indexed dataframes for high-performance annotation. +keywords: + - vcf + - annotation + - snpsift + - variant + - database +tools: + - snpsift: + description: "Genetic variant annotations and functional effect prediction toolbox" + homepage: "https://pcingola.github.io/SnpEff/snpsift/annotate_mem/" + documentation: "https://pcingola.github.io/SnpEff/snpsift/annotate_mem/" + tool_dev_url: "https://github.com/pcingola/SnpSift" + doi: "10.3389/fgene.2012.00035" + licence: + - "MIT" + identifier: biotools:snpsift +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - vcf: + type: file + description: VCF file to annotate + pattern: "*.{vcf,vcf.gz}" + ontologies: [] + - vcf_tbi: + type: file + description: Tabix index for input VCF (optional) + pattern: "*.tbi" + ontologies: [] + - - db_vcf: + type: file + description: | + VCF database file(s) for annotation. + Can provide multiple databases for multi-database annotation. + pattern: "*.{vcf,vcf.gz}" + ontologies: [] + - db_vcf_tbi: + type: file + description: Tabix index for database VCF file(s) + pattern: "*.tbi" + ontologies: [] + - db_vardb: + type: directory + description: Pre-built .snpsift.vardb directory (used instead of db_vcf if provided) + pattern: "*.snpsift.vardb" + - db_fields: + type: list + description: | + INFO field names to annotate with. + Can be a list or a single string. + - db_prefixes: + type: string + description: Prefix to add to annotated field names +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.vcf.gz": + type: file + description: Annotated VCF file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.vcf.gz.tbi": + type: file + description: Tabix index for annotated VCF + pattern: "*.vcf.gz.tbi" + ontologies: [] + versions_snpsift: + - - ${task.process}: + type: string + description: The name of the process + - snpsift: + type: string + description: The name of the tool + - SnpSift -version 2>&1 | grep -oE '[0-9]+\.[0-9]+[a-z]?': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - snpsift: + type: string + description: The tool name + - SnpSift -version 2>&1 | grep -oE '[0-9]+\.[0-9]+[a-z]?': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@friederike-hanssen" +maintainers: + - "@friederike-hanssen" diff --git a/modules/nf-core/snpsift/annmemcreate/environment.yml b/modules/nf-core/snpsift/annmemcreate/environment.yml new file mode 100644 index 0000000000..c252c50c1b --- /dev/null +++ b/modules/nf-core/snpsift/annmemcreate/environment.yml @@ -0,0 +1,7 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.23 + - bioconda::snpsift=5.4.0a diff --git a/modules/nf-core/snpsift/annmemcreate/main.nf b/modules/nf-core/snpsift/annmemcreate/main.nf new file mode 100644 index 0000000000..afe45b8dc6 --- /dev/null +++ b/modules/nf-core/snpsift/annmemcreate/main.nf @@ -0,0 +1,38 @@ +process SNPSIFT_ANNMEMCREATE { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a1/a116bb44e388ca83fea78d82fe8bdfd5cf3557254e2ec7dd3f1f17354880638c/data' : + 'community.wave.seqera.io/library/htslib_snpsift:ace461dff1cfc121' }" + + input: + tuple val(meta), path(db_vcf), path(db_vcf_tbi), val(db_fields) + + output: + tuple val(meta), path("*.snpsift.vardb"), emit: database + tuple val("${task.process}"), val('snpsift'), eval("SnpSift -version 2>&1 | grep -oE '[0-9]+\\.[0-9]+[a-z]?'"), emit: versions_snpsift, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def fields = db_fields instanceof List ? db_fields.join(',') : db_fields + + """ + SnpSift \\ + annmem \\ + -create \\ + ${args} \\ + -dbfile ${db_vcf} \\ + ${fields ? "-fields ${fields}" : ""} + """ + + stub: + """ + mkdir -p ${db_vcf}.snpsift.vardb + touch ${db_vcf}.snpsift.vardb/chr1.snpsift.df + """ +} diff --git a/modules/nf-core/snpsift/annmemcreate/meta.yml b/modules/nf-core/snpsift/annmemcreate/meta.yml new file mode 100644 index 0000000000..896ed55191 --- /dev/null +++ b/modules/nf-core/snpsift/annmemcreate/meta.yml @@ -0,0 +1,77 @@ +name: "snpsift_annmemcreate" +description: | + Create memory-optimized SnpSift vardb databases from VCF files for use with SnpSift annMem annotation. + Converts VCF files (e.g. ClinVar, dbSNP, Cosmic) into indexed dataframes for fast lookup. +keywords: + - vcf + - annotation + - snpsift + - variant + - database +tools: + - snpsift: + description: "Genetic variant annotations and functional effect prediction toolbox" + homepage: "https://pcingola.github.io/SnpEff/snpsift/annotate_mem/" + documentation: "https://pcingola.github.io/SnpEff/snpsift/annotate_mem/" + tool_dev_url: "https://github.com/pcingola/SnpSift" + doi: "10.3389/fgene.2012.00035" + licence: + - "MIT" + identifier: biotools:snpsift +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - db_vcf: + type: file + description: VCF file to build the database from + pattern: "*.{vcf,vcf.gz}" + ontologies: [] + - db_vcf_tbi: + type: file + description: Tabix index for the database VCF file (optional) + pattern: "*.tbi" + ontologies: [] + - db_fields: + type: list + description: | + INFO field names to include in the database. + Can be a list or a single string. +output: + database: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.snpsift.vardb": + type: directory + description: SnpSift vardb directory containing indexed dataframes + pattern: "*.snpsift.vardb" + versions_snpsift: + - - ${task.process}: + type: string + description: The name of the process + - snpsift: + type: string + description: The name of the tool + - SnpSift -version 2>&1 | grep -oE '[0-9]+\.[0-9]+[a-z]?': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - snpsift: + type: string + description: The tool name + - SnpSift -version 2>&1 | grep -oE '[0-9]+\.[0-9]+[a-z]?': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@friederike-hanssen" +maintainers: + - "@friederike-hanssen" diff --git a/modules/nf-core/spring/decompress/environment.yml b/modules/nf-core/spring/decompress/environment.yml new file mode 100644 index 0000000000..2e0329f7b4 --- /dev/null +++ b/modules/nf-core/spring/decompress/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::spring=1.1.1 diff --git a/modules/nf-core/spring/decompress/main.nf b/modules/nf-core/spring/decompress/main.nf new file mode 100644 index 0000000000..86ced26906 --- /dev/null +++ b/modules/nf-core/spring/decompress/main.nf @@ -0,0 +1,54 @@ +process SPRING_DECOMPRESS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/spring:1.1.1--h4ac6f70_2' : + 'biocontainers/spring:1.1.1--h4ac6f70_2' }" + + input: + tuple val(meta), path(spring) + val(write_one_fastq_gz) + + output: + tuple val(meta), path("*.fastq.gz"), emit: fastq + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.1.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def output = write_one_fastq_gz ? "-o ${prefix}.fastq.gz" : "-o ${prefix}_R1.fastq.gz ${prefix}_R2.fastq.gz" + + """ + spring \\ + -d \\ + -g \\ + -t ${task.cpus} \\ + $args \\ + -i ${spring} \\ + ${output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + spring: ${VERSION} + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.1.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def output = write_one_fastq_gz ? "echo '' | gzip > ${prefix}.fastq.gz" : "echo '' | gzip > ${prefix}_R1.fastq.gz; echo '' | gzip > ${prefix}_R2.fastq.gz" + """ + ${output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + spring: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/spring/decompress/meta.yml b/modules/nf-core/spring/decompress/meta.yml new file mode 100644 index 0000000000..72b72b75da --- /dev/null +++ b/modules/nf-core/spring/decompress/meta.yml @@ -0,0 +1,51 @@ +name: "spring_decompress" +description: Fast, efficient, lossless decompression of FASTQ files. +keywords: + - FASTQ + - decompression + - lossless +tools: + - "spring": + description: "SPRING is a compression tool for Fastq files (containing up to 4.29 + Billion reads)" + homepage: "https://github.com/shubhamchandak94/Spring" + documentation: "https://github.com/shubhamchandak94/Spring/blob/master/README.md" + tool_dev_url: "https://github.com/shubhamchandak94/Spring" + doi: "10.1093/bioinformatics/bty1015" + licence: ["Free for non-commercial use"] + identifier: biotools:spring +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - spring: + type: file + description: Spring file to decompress. + pattern: "*.{spring}" + - - write_one_fastq_gz: + type: boolean + description: | + Controls whether spring should write one fastq.gz file with reads from both directions or two fastq.gz files with reads from distinct directions + pattern: "true or false" +output: + - fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: Decompressed FASTQ file(s). + pattern: "*.{fastq.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@xec-cm" +maintainers: + - "@xec-cm" diff --git a/modules/nf-core/strelka/germline/environment.yml b/modules/nf-core/strelka/germline/environment.yml new file mode 100644 index 0000000000..ad1ff8498d --- /dev/null +++ b/modules/nf-core/strelka/germline/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::python=2.7.15 + - bioconda::strelka=2.9.10=h9ee0642_1 diff --git a/modules/nf-core/strelka/germline/main.nf b/modules/nf-core/strelka/germline/main.nf new file mode 100644 index 0000000000..00499135c0 --- /dev/null +++ b/modules/nf-core/strelka/germline/main.nf @@ -0,0 +1,65 @@ +process STRELKA_GERMLINE { + tag "${meta.id}" + label 'process_medium' + label 'error_retry' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/strelka:2.9.10--h9ee0642_1' + : 'biocontainers/strelka:2.9.10--h9ee0642_1'}" + + input: + tuple val(meta), path(input), path(input_index), path(target_bed), path(target_bed_index) + path fasta + path fai + + output: + tuple val(meta), path("*variants.vcf.gz"), emit: vcf + tuple val(meta), path("*variants.vcf.gz.tbi"), emit: vcf_tbi + tuple val(meta), path("*genome.vcf.gz"), emit: genome_vcf + tuple val(meta), path("*genome.vcf.gz.tbi"), emit: genome_vcf_tbi + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def regions = target_bed ? "--callRegions ${target_bed}" : "" + """ + configureStrelkaGermlineWorkflow.py \\ + --bam ${input} \\ + --referenceFasta ${fasta} \\ + ${regions} \\ + ${args} \\ + --runDir strelka + + sed -i s/"isEmail = isLocalSmtp()"/"isEmail = False"/g strelka/runWorkflow.py + + python strelka/runWorkflow.py -m local -j ${task.cpus} + mv strelka/results/variants/genome.*.vcf.gz ${prefix}.genome.vcf.gz + mv strelka/results/variants/genome.*.vcf.gz.tbi ${prefix}.genome.vcf.gz.tbi + mv strelka/results/variants/variants.vcf.gz ${prefix}.variants.vcf.gz + mv strelka/results/variants/variants.vcf.gz.tbi ${prefix}.variants.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + strelka: \$( configureStrelkaGermlineWorkflow.py --version ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.genome.vcf.gz + touch ${prefix}.genome.vcf.gz.tbi + echo "" | gzip > ${prefix}.variants.vcf.gz + touch ${prefix}.variants.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + strelka: \$( configureStrelkaSomaticWorkflow.py --version ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/strelka/germline/meta.yml b/modules/nf-core/strelka/germline/meta.yml new file mode 100644 index 0000000000..c20bf89a6a --- /dev/null +++ b/modules/nf-core/strelka/germline/meta.yml @@ -0,0 +1,112 @@ +name: strelka_germline +description: Strelka2 is a fast and accurate small variant caller optimized for analysis + of germline variation +keywords: + - variantcalling + - germline + - wgs + - vcf + - variants +tools: + - strelka: + description: Strelka calls somatic and germline small variants from mapped sequencing + reads + homepage: https://github.com/Illumina/strelka + documentation: https://github.com/Illumina/strelka/blob/v2.9.x/docs/userGuide/README.md + tool_dev_url: https://github.com/Illumina/strelka + doi: 10.1038/s41592-018-0051-x + licence: ["GPL v3"] + identifier: biotools:strelka +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - input: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram}" + ontologies: [] + - input_index: + type: file + description: BAM/CRAI index file + pattern: "*.{bai,crai}" + ontologies: [] + - target_bed: + type: file + description: BED file containing target regions for variant calling + pattern: "*.{bed}" + ontologies: [] + - target_bed_index: + type: file + description: Index for BED file containing target regions for variant calling + pattern: "*.{bed.tbi}" + ontologies: [] + - fasta: + type: file + description: Genome reference FASTA file + pattern: "*.{fa,fasta}" + ontologies: [] + - fai: + type: file + description: Genome reference FASTA index file + pattern: "*.{fa.fai,fasta.fai}" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - "*variants.vcf.gz": + type: file + description: gzipped germline variant file + pattern: "*.{vcf.gz}" + ontologies: [] + vcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - "*variants.vcf.gz.tbi": + type: file + description: index file for the vcf file + pattern: "*.vcf.gz.tbi" + ontologies: [] + genome_vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - "*genome.vcf.gz": + type: file + description: variant records and compressed non-variant blocks + pattern: "*_genome.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + genome_vcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - "*genome.vcf.gz.tbi": + type: file + description: index file for the genome_vcf file + pattern: "*_genome.vcf.gz.tbi" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@arontommi" +maintainers: + - "@arontommi" diff --git a/modules/nf-core/strelka/somatic/environment.yml b/modules/nf-core/strelka/somatic/environment.yml new file mode 100644 index 0000000000..ad1ff8498d --- /dev/null +++ b/modules/nf-core/strelka/somatic/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::python=2.7.15 + - bioconda::strelka=2.9.10=h9ee0642_1 diff --git a/modules/nf-core/strelka/somatic/main.nf b/modules/nf-core/strelka/somatic/main.nf new file mode 100644 index 0000000000..052cfee15f --- /dev/null +++ b/modules/nf-core/strelka/somatic/main.nf @@ -0,0 +1,68 @@ +process STRELKA_SOMATIC { + tag "${meta.id}" + label 'process_medium' + label 'error_retry' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/strelka:2.9.10--h9ee0642_1' + : 'biocontainers/strelka:2.9.10--h9ee0642_1'}" + + input: + tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi), path(target_bed), path(target_bed_index) + path fasta + path fai + + output: + tuple val(meta), path("*.somatic_indels.vcf.gz"), emit: vcf_indels + tuple val(meta), path("*.somatic_indels.vcf.gz.tbi"), emit: vcf_indels_tbi + tuple val(meta), path("*.somatic_snvs.vcf.gz"), emit: vcf_snvs + tuple val(meta), path("*.somatic_snvs.vcf.gz.tbi"), emit: vcf_snvs_tbi + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def options_target_bed = target_bed ? "--callRegions ${target_bed}" : "" + def options_manta = manta_candidate_small_indels ? "--indelCandidates ${manta_candidate_small_indels}" : "" + """ + configureStrelkaSomaticWorkflow.py \\ + --tumor ${input_tumor} \\ + --normal ${input_normal} \\ + --referenceFasta ${fasta} \\ + ${options_target_bed} \\ + ${options_manta} \\ + ${args} \\ + --runDir strelka + + sed -i s/"isEmail = isLocalSmtp()"/"isEmail = False"/g strelka/runWorkflow.py + + python strelka/runWorkflow.py -m local -j ${task.cpus} + mv strelka/results/variants/somatic.indels.vcf.gz ${prefix}.somatic_indels.vcf.gz + mv strelka/results/variants/somatic.indels.vcf.gz.tbi ${prefix}.somatic_indels.vcf.gz.tbi + mv strelka/results/variants/somatic.snvs.vcf.gz ${prefix}.somatic_snvs.vcf.gz + mv strelka/results/variants/somatic.snvs.vcf.gz.tbi ${prefix}.somatic_snvs.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + strelka: \$( configureStrelkaSomaticWorkflow.py --version ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.somatic_indels.vcf.gz + touch ${prefix}.somatic_indels.vcf.gz.tbi + echo "" | gzip > ${prefix}.somatic_snvs.vcf.gz + touch ${prefix}.somatic_snvs.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + strelka: \$( configureStrelkaSomaticWorkflow.py --version ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/strelka/somatic/meta.yml b/modules/nf-core/strelka/somatic/meta.yml new file mode 100644 index 0000000000..eca9ea43f2 --- /dev/null +++ b/modules/nf-core/strelka/somatic/meta.yml @@ -0,0 +1,132 @@ +name: strelka_somatic +description: Strelka2 is a fast and accurate small variant caller optimized for analysis + of germline variation in small cohorts and somatic variation in tumor/normal sample + pairs +keywords: + - variant calling + - germline + - wgs + - vcf + - variants +tools: + - strelka: + description: Strelka calls somatic and germline small variants from mapped sequencing + reads + homepage: https://github.com/Illumina/strelka + documentation: https://github.com/Illumina/strelka/blob/v2.9.x/docs/userGuide/README.md + tool_dev_url: https://github.com/Illumina/strelka + doi: 10.1038/s41592-018-0051-x + licence: ["GPL v3"] + identifier: biotools:strelka +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_normal: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - input_index_normal: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + - input_tumor: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - input_index_tumor: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + - manta_candidate_small_indels: + type: file + description: VCF.gz file + pattern: "*.{vcf.gz}" + ontologies: [] + - manta_candidate_small_indels_tbi: + type: file + description: VCF.gz index file + pattern: "*.tbi" + ontologies: [] + - target_bed: + type: file + description: BED file containing target regions for variant calling + pattern: "*.{bed}" + ontologies: [] + - target_bed_index: + type: file + description: Index for BED file containing target regions for variant calling + pattern: "*.{bed.tbi}" + ontologies: [] + - fasta: + type: file + description: Genome reference FASTA file + pattern: "*.{fa,fasta}" + ontologies: [] + - fai: + type: file + description: Genome reference FASTA index file + pattern: "*.{fa.fai,fasta.fai}" + ontologies: [] +output: + vcf_indels: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.somatic_indels.vcf.gz": + type: file + description: Gzipped VCF file containing variants + pattern: "*.{vcf.gz}" + ontologies: [] + vcf_indels_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.somatic_indels.vcf.gz.tbi": + type: file + description: Index for gzipped VCF file containing variants + pattern: "*.{vcf.gz.tbi}" + ontologies: [] + vcf_snvs: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.somatic_snvs.vcf.gz": + type: file + description: Gzipped VCF file containing variants + pattern: "*.{vcf.gz}" + ontologies: [] + vcf_snvs_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.somatic_snvs.vcf.gz.tbi": + type: file + description: Index for gzipped VCF file containing variants + pattern: "*.{vcf.gz.tbi}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/svdb/merge/environment.yml b/modules/nf-core/svdb/merge/environment.yml new file mode 100644 index 0000000000..dc587136eb --- /dev/null +++ b/modules/nf-core/svdb/merge/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + - bcftools=1.21 + - svdb=2.8.2 diff --git a/modules/nf-core/svdb/merge/main.nf b/modules/nf-core/svdb/merge/main.nf new file mode 100644 index 0000000000..104f5ad799 --- /dev/null +++ b/modules/nf-core/svdb/merge/main.nf @@ -0,0 +1,105 @@ +process SVDB_MERGE { + tag "$meta.id" + label 'process_single' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-375a758a4ca8c128fb9d38047a68a9f4322d2acd:b3615e06ef17566f2988a215ce9e10808c1d08bf-0': + 'biocontainers/mulled-v2-375a758a4ca8c128fb9d38047a68a9f4322d2acd:b3615e06ef17566f2988a215ce9e10808c1d08bf-0' }" + + input: + tuple val(meta), path(vcfs) + val(input_priority) + val(sort_inputs) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + // Ensure priority list matches the number of VCFs if priority is provided + if (input_priority && vcfs.collect().size() != input_priority.collect().size()) { + error "If priority is used, one tag per VCF is needed" + } + + def input = "" + def prio = "" + if (input_priority) { + if (vcfs.collect().size() > 1 && sort_inputs) { + // make vcf-priority pairs and sort on VCF name, so priority is also sorted the same + def pairs = vcfs.indices.collect { [vcfs[it], input_priority[it]] } + pairs = pairs.sort { a, b -> a[0].name <=> b[0].name } + vcfs = pairs.collect { it[0] } + priority = pairs.collect { it[1] } + } else { + priority = input_priority + } + + // Build inputs + prio = "--priority ${input_priority.join(',')}" + input = vcfs + .withIndex() + .collect { vcf, index -> "${vcf}:${priority[index]}" } + .join(" ") + + } else { + // if there's no priority input just sort the vcfs by name if possible + input = (vcfs.collect().size() > 1 && sort_inputs) ? vcfs.sort { it.name } : vcfs + } + + def extension = args2.contains("--output-type b") || args2.contains("-Ob") ? "bcf.gz" : + args2.contains("--output-type u") || args2.contains("-Ou") ? "bcf" : + args2.contains("--output-type z") || args2.contains("-Oz") ? "vcf.gz" : + args2.contains("--output-type v") || args2.contains("-Ov") ? "vcf" : + "vcf.gz" + """ + svdb \\ + --merge \\ + $args \\ + $prio \\ + --vcf $input |\\ + bcftools view \\ + $args2 \\ + --threads ${task.cpus} \\ + --output ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + svdb: \$( echo \$(svdb) | head -1 | sed 's/usage: SVDB-\\([0-9]\\.[0-9]\\.[0-9]\\).*/\\1/' ) + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def args2 = task.ext.args2 ?: '' + def extension = args2.contains("--output-type b") || args2.contains("-Ob") ? "bcf.gz" : + args2.contains("--output-type u") || args2.contains("-Ou") ? "bcf" : + args2.contains("--output-type z") || args2.contains("-Oz") ? "vcf.gz" : + args2.contains("--output-type v") || args2.contains("-Ov") ? "vcf" : + "vcf.gz" + def index_type = args2.contains("--write-index=tbi") || args2.contains("-W=tbi") ? "tbi" : + args2.contains("--write-index=csi") || args2.contains("-W=csi") ? "csi" : + args2.contains("--write-index") || args2.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index_type.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index_type}" : "" + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + svdb: \$( echo \$(svdb) | head -1 | sed 's/usage: SVDB-\\([0-9]\\.[0-9]\\.[0-9]\\).*/\\1/' ) + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/svdb/merge/meta.yml b/modules/nf-core/svdb/merge/meta.yml new file mode 100644 index 0000000000..c34a9cb157 --- /dev/null +++ b/modules/nf-core/svdb/merge/meta.yml @@ -0,0 +1,77 @@ +name: svdb_merge +description: The merge module merges structural variants within one or more vcf files. +keywords: + - structural variants + - vcf + - merge +tools: + - svdb: + description: structural variant database software + homepage: https://github.com/J35P312/SVDB + documentation: https://github.com/J35P312/SVDB/blob/master/README.md + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - vcfs: + type: list + description: | + One or more VCF files. The order and number of files should correspond to + the order and number of tags in the `priority` input channel. + pattern: "*.{vcf,vcf.gz}" + - - input_priority: + type: list + description: | + Prioritize the input VCF files according to this list, + e.g ['tiddit','cnvnator']. The order and number of tags should correspond to + the order and number of VCFs in the `vcfs` input channel. + - - sort_inputs: + type: boolean + description: | + Should the input files be sorted by name. The priority tag will be sorted + together with it's corresponding VCF file. +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: VCF output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" + - "@fellen31" diff --git a/modules/nf-core/tabix/bgziptabix/environment.yml b/modules/nf-core/tabix/bgziptabix/environment.yml new file mode 100644 index 0000000000..771b138707 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.21 diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf new file mode 100644 index 0000000000..30eae745fc --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/main.nf @@ -0,0 +1,40 @@ +process TABIX_BGZIPTABIX { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/92/92859404d861ae01afb87e2b789aebc71c0ab546397af890c7df74e4ee22c8dd/data' : + 'community.wave.seqera.io/library/htslib:1.21--ff8e28a189fbecaa' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.gz"), path("*.{tbi,csi}"), emit: gz_index + tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'") , topic: versions , emit: versions_tabix + tuple val("${task.process}"), val('bgzip'), eval("bgzip --version | sed '1!d;s/.* //'"), topic: versions, emit: versions_bgzip + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bgzip --threads ${task.cpus} -c $args $input > ${prefix}.${input.getExtension()}.gz + tabix --threads ${task.cpus} $args2 ${prefix}.${input.getExtension()}.gz + + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def args2 = task.ext.args2 ?: '' + def index = args2.contains("-C ") || args2.contains("--csi") ? "csi" : "tbi" + """ + echo "" | gzip > ${prefix}.${input.getExtension()}.gz + touch ${prefix}.${input.getExtension()}.gz.${index} + + """ +} diff --git a/modules/nf-core/tabix/bgziptabix/meta.yml b/modules/nf-core/tabix/bgziptabix/meta.yml new file mode 100644 index 0000000000..2a3078c55f --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/meta.yml @@ -0,0 +1,93 @@ +name: tabix_bgziptabix +description: bgzip a sorted tab-delimited genome file and then create tabix + index +keywords: + - bgzip + - compress + - index + - tabix + - vcf +tools: + - tabix: + description: Generic indexer for TAB-delimited genome position files. + homepage: https://www.htslib.org/doc/tabix.html + documentation: https://www.htslib.org/doc/tabix.1.html + doi: 10.1093/bioinformatics/btq671 + licence: ["MIT"] + identifier: biotools:tabix +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Sorted tab-delimited genome file + ontologies: [] +output: + gz_index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.gz": + type: file + description: bgzipped tab-delimited genome file + pattern: "*.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - "*.{tbi,csi}": + type: file + description: Tabix index file (either tbi or csi) + pattern: "*.{tbi,csi}" + ontologies: [] + versions_tabix: + - - ${task.process}: + type: string + description: The process the versions were collected from + - tabix: + type: string + description: The tool name + - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+': + type: eval + description: The expression to obtain the version of the tool + versions_bgzip: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bgzip: + type: string + description: The tool name + - bgzip --version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - tabix: + type: string + description: The tool name + - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+': + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The process the versions were collected from + - bgzip: + type: string + description: The tool name + - bgzip --version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@maxulysse" + - "@DLBPointon" +maintainers: + - "@maxulysse" + - "@DLBPointon" diff --git a/modules/nf-core/tabix/tabix/environment.yml b/modules/nf-core/tabix/tabix/environment.yml new file mode 100644 index 0000000000..771b138707 --- /dev/null +++ b/modules/nf-core/tabix/tabix/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.21 diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf new file mode 100644 index 0000000000..325b8bbff8 --- /dev/null +++ b/modules/nf-core/tabix/tabix/main.nf @@ -0,0 +1,45 @@ +process TABIX_TABIX { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/92/92859404d861ae01afb87e2b789aebc71c0ab546397af890c7df74e4ee22c8dd/data' : + 'community.wave.seqera.io/library/htslib:1.21--ff8e28a189fbecaa' }" + + input: + tuple val(meta), path(tab) + + output: + tuple val(meta), path("*.tbi"), optional:true, emit: tbi + tuple val(meta), path("*.csi"), optional:true, emit: csi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + tabix \\ + --threads $task.cpus \\ + $args \\ + $tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${tab}.tbi + touch ${tab}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml new file mode 100644 index 0000000000..c172968d87 --- /dev/null +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -0,0 +1,63 @@ +name: tabix_tabix +description: create tabix index from a sorted bgzip tab-delimited genome file +keywords: + - index + - tabix + - vcf +tools: + - tabix: + description: Generic indexer for TAB-delimited genome position files. + homepage: https://www.htslib.org/doc/tabix.html + documentation: https://www.htslib.org/doc/tabix.1.html + doi: 10.1093/bioinformatics/btq671 + licence: ["MIT"] + identifier: biotools:tabix +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tab: + type: file + description: TAB-delimited genome position file compressed with bgzip + pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}" + ontologies: [] +output: + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: tabix index file + pattern: "*.{tbi}" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: coordinate sorted index file + pattern: "*.{csi}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/tiddit/sv/environment.yml b/modules/nf-core/tiddit/sv/environment.yml new file mode 100644 index 0000000000..a33b14c85f --- /dev/null +++ b/modules/nf-core/tiddit/sv/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::tiddit=3.6.1 diff --git a/modules/nf-core/tiddit/sv/main.nf b/modules/nf-core/tiddit/sv/main.nf new file mode 100644 index 0000000000..f350e31443 --- /dev/null +++ b/modules/nf-core/tiddit/sv/main.nf @@ -0,0 +1,56 @@ +process TIDDIT_SV { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/tiddit:3.6.1--py38h24c8ff8_0' : + 'biocontainers/tiddit:3.6.1--py38h24c8ff8_0' }" + + input: + tuple val(meta), path(input), path(input_index) + tuple val(meta2), path(fasta) + tuple val(meta3), path(bwa_index) + + output: + tuple val(meta), path("*.vcf") , emit: vcf + tuple val(meta), path("*.ploidies.tab"), emit: ploidy + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def bwa_command = bwa_index ? "[[ -d $bwa_index ]] && for i in $bwa_index/*; do [[ -f $fasta && ! \"\$i\" =~ .*\"$fasta.\".* ]] && ln -s \$i ${fasta}.\${i##*.} || ln -s \$i .; done" : "" + + """ + $bwa_command + + tiddit \\ + --sv \\ + $args \\ + --threads $task.cpus \\ + --bam $input \\ + --ref $fasta \\ + -o $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*tiddit-//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf + touch ${prefix}.ploidies.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*tiddit-//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tiddit/sv/meta.yml b/modules/nf-core/tiddit/sv/meta.yml new file mode 100644 index 0000000000..21527baf13 --- /dev/null +++ b/modules/nf-core/tiddit/sv/meta.yml @@ -0,0 +1,76 @@ +name: tiddit_sv +description: Identify chromosomal rearrangements. +keywords: + - structural + - variants + - vcf +tools: + - sv: + description: Search for structural variants. + homepage: https://github.com/SciLifeLab/TIDDIT + documentation: https://github.com/SciLifeLab/TIDDIT/blob/master/README.md + doi: 10.12688/f1000research.11168.1 + licence: ["GPL-3.0-or-later"] + identifier: biotools:tiddit +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAM/CRAM index file + pattern: "*.{bai,crai}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test_fasta']` + - fasta: + type: file + description: Input FASTA file + pattern: "*.{fasta,fa}" + - - meta3: + type: map + description: | + Groovy Map containing sample information from bwa index + e.g. `[ id:'test_bwa-index' ]` + - bwa_index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf": + type: file + description: vcf + pattern: "*.{vcf}" + - ploidy: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.ploidies.tab": + type: file + description: tab + pattern: "*.{ploidies.tab}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 0000000000..9b926b1ffa --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,12 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::coreutils=9.5 + - conda-forge::grep=3.11 + - conda-forge::gzip=1.13 + - conda-forge::lbzip2=2.5 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 0000000000..e712ebe63a --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,84 @@ +process UNTAR { + tag "${archive}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/52ccce28d2ab928ab862e25aae26314d69c8e38bd41ca9431c67ef05221348aa/data' + : 'community.wave.seqera.io/library/coreutils_grep_gzip_lbzip2_pruned:838ba80435a629f8'}" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("${prefix}"), emit: untar + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: (meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir ${prefix} + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C ${prefix} --strip-components 1 \\ + -xavf \\ + ${args} \\ + ${archive} \\ + ${args2} + else + tar \\ + -C ${prefix} \\ + -xavf \\ + ${args} \\ + ${archive} \\ + ${args2} + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: (meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir ${prefix} + ## Dry-run untaring the archive to get the files and place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch \${i} + else + mkdir -p \${i} + fi + done + else + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch ${prefix}/\${i} + else + mkdir -p ${prefix}/\${i} + fi + done + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml new file mode 100644 index 0000000000..1b6bf491e6 --- /dev/null +++ b/modules/nf-core/untar/meta.yml @@ -0,0 +1,57 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress + - extract +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" + ontologies: + - edam: http://edamontology.org/format_3981 # TAR format + - edam: http://edamontology.org/format_3989 # GZIP format +output: + untar: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*/" + - ${prefix}: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*/" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/unzip/environment.yml b/modules/nf-core/unzip/environment.yml new file mode 100644 index 0000000000..2461589539 --- /dev/null +++ b/modules/nf-core/unzip/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::p7zip=16.02 diff --git a/modules/nf-core/unzip/main.nf b/modules/nf-core/unzip/main.nf new file mode 100644 index 0000000000..a0c02109cd --- /dev/null +++ b/modules/nf-core/unzip/main.nf @@ -0,0 +1,49 @@ +process UNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/p7zip:16.02' : + 'biocontainers/p7zip:16.02' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("${prefix}/"), emit: unzipped_archive + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + if ( archive instanceof List && archive.name.size > 1 ) { error "[UNZIP] error: 7za only accepts a single archive as input. Please check module input." } + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName) + """ + 7za \\ + x \\ + -o"${prefix}"/ \\ + $args \\ + $archive + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + 7za: \$(echo \$(7za --help) | sed 's/.*p7zip Version //; s/(.*//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + if ( archive instanceof List && archive.name.size > 1 ) { error "[UNZIP] error: 7za only accepts a single archive as input. Please check module input." } + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName) + """ + mkdir "${prefix}" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + 7za: \$(echo \$(7za --help) | sed 's/.*p7zip Version //; s/(.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/unzip/meta.yml b/modules/nf-core/unzip/meta.yml new file mode 100644 index 0000000000..ba1eb9129b --- /dev/null +++ b/modules/nf-core/unzip/meta.yml @@ -0,0 +1,50 @@ +name: unzip +description: Unzip ZIP archive files +keywords: + - unzip + - decompression + - zip + - archiving +tools: + - unzip: + description: p7zip is a quick port of 7z.exe and 7za.exe (command line version + of 7zip, see www.7-zip.org) for Unix. + homepage: https://sourceforge.net/projects/p7zip/ + documentation: https://sourceforge.net/projects/p7zip/ + tool_dev_url: https://sourceforge.net/projects/p7zip" + licence: ["LGPL-2.1-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: ZIP file + pattern: "*.zip" + ontologies: + - edam: http://edamontology.org/format_3987 # ZIP format +output: + unzipped_archive: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}/: + type: directory + description: Directory contents of the unzipped archive + pattern: "${archive.baseName}/" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/varlociraptor/callvariants/environment.yml b/modules/nf-core/varlociraptor/callvariants/environment.yml new file mode 100644 index 0000000000..4cfd4a7d5a --- /dev/null +++ b/modules/nf-core/varlociraptor/callvariants/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/varlociraptor + - bioconda::varlociraptor=8.9.3 diff --git a/modules/nf-core/varlociraptor/callvariants/main.nf b/modules/nf-core/varlociraptor/callvariants/main.nf new file mode 100644 index 0000000000..fc24cb5ab4 --- /dev/null +++ b/modules/nf-core/varlociraptor/callvariants/main.nf @@ -0,0 +1,39 @@ +process VARLOCIRAPTOR_CALLVARIANTS { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9a/9ac0825c21b2cbaf9535ffe443e53a0bb4d61596cafcb5a5b444dfb31b945ab2/data' + : 'community.wave.seqera.io/library/varlociraptor:8.9.3--fa2ce5da2782669c'}" + + input: + tuple val(meta), path(vcfs), path(scenario), val(scenario_aliases) + + output: + tuple val(meta), path("*.bcf"), emit: bcf + tuple val("${task.process}"), val('varlociraptor'), eval("varlociraptor --version | sed 's/^varlociraptor //'"), topic: versions, emit: versions_varlociraptor + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_called" + + //If we use a scenario file and if there is more than 1 vcf, then collect scenario_aliaes and vcfs to scenario_alias_0=vcf_0 scenario_alias_1=vcf_1, etc + //If we use a scenario file and if there is exactly 1 vcf, then scenario_alias=vcf + def scenario_samples = vcfs instanceof List && vcfs.size() > 1 ? [scenario_aliases, vcfs].transpose().collect { files -> "${files[0]}=${files[1]}" }.join(' ') : "${scenario_aliases}=${vcfs}" + """ + varlociraptor call variants \\ + --output ${prefix}.bcf \\ + generic --scenario ${scenario} --obs ${scenario_samples} \\ + ${args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}_called" + """ + touch ${prefix}.bcf + """ +} diff --git a/modules/nf-core/varlociraptor/callvariants/meta.yml b/modules/nf-core/varlociraptor/callvariants/meta.yml new file mode 100644 index 0000000000..30785a580a --- /dev/null +++ b/modules/nf-core/varlociraptor/callvariants/meta.yml @@ -0,0 +1,79 @@ +name: "varlociraptor_callvariants" +description: Call variants for a given scenario specified with the varlociraptor calling + grammar, preprocessed by varlociraptor preprocessing +keywords: + - observations + - variants + - calling +tools: + - "varlociraptor": + description: "Flexible, uncertainty-aware variant calling with parameter free + filtration via FDR control." + homepage: "https://varlociraptor.github.io/docs/estimating/" + documentation: "https://varlociraptor.github.io/docs/calling/" + tool_dev_url: "https://github.com/varlociraptor/varlociraptor" + doi: "10.1186/s13059-020-01993-6" + licence: ["GPL v3"] + identifier: biotools:varlociraptor +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcfs: + type: file + description: Sorted VCF/BCF file containing sample observations, Can also be + a list of files + pattern: "*.{vcf,bcf,vcf.gz}" + ontologies: + - edam: http://edamontology.org/format_3016 # VCF + - edam: http://edamontology.org/format_3020 # BCF + - scenario: + type: file + description: Yaml file containing scenario information (optional) + pattern: "*.{yml,yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + - scenario_aliases: + type: list + description: List of aliases for the scenario (optional) +output: + bcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bcf": + type: file + description: BCF file containing sample observations + pattern: "*.bcf" + ontologies: + - edam: http://edamontology.org/format_3020 # BCF + versions_varlociraptor: + - - ${task.process}: + type: string + description: The process the versions were collected from + - varlociraptor: + type: string + description: The tool name + - "varlociraptor --version | sed 's/^varlociraptor //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - varlociraptor: + type: string + description: The tool name + - "varlociraptor --version | sed 's/^varlociraptor //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" + - "@famosab" diff --git a/modules/nf-core/varlociraptor/estimatealignmentproperties/environment.yml b/modules/nf-core/varlociraptor/estimatealignmentproperties/environment.yml new file mode 100644 index 0000000000..4cfd4a7d5a --- /dev/null +++ b/modules/nf-core/varlociraptor/estimatealignmentproperties/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/varlociraptor + - bioconda::varlociraptor=8.9.3 diff --git a/modules/nf-core/varlociraptor/estimatealignmentproperties/main.nf b/modules/nf-core/varlociraptor/estimatealignmentproperties/main.nf new file mode 100644 index 0000000000..ea786c54b9 --- /dev/null +++ b/modules/nf-core/varlociraptor/estimatealignmentproperties/main.nf @@ -0,0 +1,36 @@ +process VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9a/9ac0825c21b2cbaf9535ffe443e53a0bb4d61596cafcb5a5b444dfb31b945ab2/data' + : 'community.wave.seqera.io/library/varlociraptor:8.9.3--fa2ce5da2782669c'}" + + input: + tuple val(meta), path(bam), path(bai), path(fasta), path(fai) + + output: + tuple val(meta), path("*.alignment-properties.json"), emit: alignment_properties_json + tuple val("${task.process}"), val('varlociraptor'), eval("varlociraptor --version | sed 's/^varlociraptor //'"), topic: versions, emit: versions_varlociraptor + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + varlociraptor estimate alignment-properties \\ + ${fasta} \\ + --bams ${bam} \\ + ${args} \\ + > ${prefix}.alignment-properties.json + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.alignment-properties.json + """ +} diff --git a/modules/nf-core/varlociraptor/estimatealignmentproperties/meta.yml b/modules/nf-core/varlociraptor/estimatealignmentproperties/meta.yml new file mode 100644 index 0000000000..f5ba22c6ba --- /dev/null +++ b/modules/nf-core/varlociraptor/estimatealignmentproperties/meta.yml @@ -0,0 +1,86 @@ +name: "varlociraptor_estimatealignmentproperties" +description: In order to judge about candidate indel and structural variants, Varlociraptor + needs to know about certain properties of the underlying sequencing experiment in + combination with the used read aligner. +keywords: + - estimation + - alignment + - variants +tools: + - "varlociraptor": + description: "Flexible, uncertainty-aware variant calling with parameter free + filtration via FDR control." + homepage: "https://varlociraptor.github.io/docs/estimating/" + documentation: "https://varlociraptor.github.io/docs/estimating/" + tool_dev_url: "https://github.com/varlociraptor/varlociraptor" + doi: "10.1186/s13059-020-01993-6" + licence: ["GPL v3"] + identifier: biotools:varlociraptor +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: http://edamontology.org/format_2572 # BAM + - bai: + type: file + description: Index of sorted BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" + ontologies: + - edam: http://edamontology.org/format_3327 # BAI + - fasta: + type: file + description: Reference fasta file + pattern: "*.{fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 # FASTA + - fai: + type: file + description: Index for reference fasta file (must be with samtools index) + pattern: "*.{fai}" + ontologies: [] +output: + alignment_properties_json: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.alignment-properties.json": + type: file + description: File containing alignment properties + pattern: "*.alignment-properties.json" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + versions_varlociraptor: + - - ${task.process}: + type: string + description: The process the versions were collected from + - varlociraptor: + type: string + description: The tool name + - "varlociraptor --version | sed 's/^varlociraptor //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - varlociraptor: + type: string + description: The tool name + - "varlociraptor --version | sed 's/^varlociraptor //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" + - "@famosab" diff --git a/modules/nf-core/varlociraptor/preprocess/environment.yml b/modules/nf-core/varlociraptor/preprocess/environment.yml new file mode 100644 index 0000000000..4cfd4a7d5a --- /dev/null +++ b/modules/nf-core/varlociraptor/preprocess/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/varlociraptor + - bioconda::varlociraptor=8.9.3 diff --git a/modules/nf-core/varlociraptor/preprocess/main.nf b/modules/nf-core/varlociraptor/preprocess/main.nf new file mode 100644 index 0000000000..4fabdbfdf1 --- /dev/null +++ b/modules/nf-core/varlociraptor/preprocess/main.nf @@ -0,0 +1,39 @@ +process VARLOCIRAPTOR_PREPROCESS { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9a/9ac0825c21b2cbaf9535ffe443e53a0bb4d61596cafcb5a5b444dfb31b945ab2/data' + : 'community.wave.seqera.io/library/varlociraptor:8.9.3--fa2ce5da2782669c'}" + + input: + tuple val(meta), path(bam), path(bai), path(candidates), path(alignment_json), path(fasta), path(fai) + + output: + tuple val(meta), path("*.bcf"), emit: bcf + tuple val("${task.process}"), val('varlociraptor'), eval("varlociraptor --version | sed 's/^varlociraptor //'"), topic: versions, emit: versions_varlociraptor + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def alignment_properties_json = alignment_json ? "--alignment-properties ${alignment_json}" : "" + """ + varlociraptor preprocess variants \\ + ${fasta} \\ + ${alignment_properties_json} \\ + --bam ${bam} \\ + --candidates ${candidates} \\ + ${args} \\ + --output ${prefix}.bcf + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bcf + """ +} diff --git a/modules/nf-core/varlociraptor/preprocess/meta.yml b/modules/nf-core/varlociraptor/preprocess/meta.yml new file mode 100644 index 0000000000..27edd74a15 --- /dev/null +++ b/modules/nf-core/varlociraptor/preprocess/meta.yml @@ -0,0 +1,98 @@ +name: "varlociraptor_preprocess" +description: Obtains per-sample observations for the actual calling process with varlociraptor + calls +keywords: + - observations + - variants + - preprocessing +tools: + - "varlociraptor": + description: | + Flexible, uncertainty-aware variant calling with parameter free + filtration via FDR control. + homepage: "https://varlociraptor.github.io/docs/estimating/" + documentation: "https://varlociraptor.github.io/docs/calling/" + tool_dev_url: "https://github.com/varlociraptor/varlociraptor" + doi: "10.1186/s13059-020-01993-6" + licence: ["GPL v3"] + identifier: biotools:varlociraptor +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: http://edamontology.org/format_2572 # BAM + - bai: + type: file + description: Index of the BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" + ontologies: + - edam: http://edamontology.org/format_3327 # BAI + - candidates: + type: file + description: Sorted BCF/VCF file + pattern: "*.{bcf,vcf,vcf.gz}" + ontologies: + - edam: http://edamontology.org/format_3016 # VCF + - alignment_json: + type: file + description: File containing alignment properties obtained with varlociraptor/estimatealignmentproperties + pattern: "*.json" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + - fasta: + type: file + description: Reference fasta file + pattern: "*.{fasta}" + ontologies: + - edam: http://edamontology.org/format_1929 # FASTA + - fai: + type: file + description: Index for reference fasta file (must be with samtools index) + pattern: "*.{fai}" + ontologies: [] +output: + bcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bcf": + type: file + description: BCF file containing sample observations + pattern: "*.bcf" + ontologies: + - edam: http://edamontology.org/format_3020 # BCF + versions_varlociraptor: + - - ${task.process}: + type: string + description: The process the versions were collected from + - varlociraptor: + type: string + description: The tool name + - "varlociraptor --version | sed 's/^varlociraptor //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - varlociraptor: + type: string + description: The tool name + - "varlociraptor --version | sed 's/^varlociraptor //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" + - "@famosab" diff --git a/modules/nf-core/vcflib/vcffilter/environment.yml b/modules/nf-core/vcflib/vcffilter/environment.yml new file mode 100644 index 0000000000..b26a11689c --- /dev/null +++ b/modules/nf-core/vcflib/vcffilter/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::vcflib=1.0.14 diff --git a/modules/nf-core/vcflib/vcffilter/main.nf b/modules/nf-core/vcflib/vcffilter/main.nf new file mode 100644 index 0000000000..9a375df3fd --- /dev/null +++ b/modules/nf-core/vcflib/vcffilter/main.nf @@ -0,0 +1,59 @@ +process VCFLIB_VCFFILTER { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/fc/fc33d59c090cef123aca26ae17fbddbd596640304d8325cbd5816229fa2c05ee/data' + : 'community.wave.seqera.io/library/vcflib:1.0.14--cc8ffb2c1a080797'}" + + input: + tuple val(meta), path(vcf), path(tbi) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}.filter" + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '1.0.14' + if (!(args.contains("-f") || args.contains("--info-filter") || args.contains("-g") || args.contains("--genotype-filter"))) { + error("VCFLIB_VCFFILTER requires either the -f/--info-filter or -g/--genotype-filter arguments to be supplied using ext.args.") + } + if ("${vcf}" == "${prefix}.vcf.gz") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + """ + vcffilter \\ + ${args} \\ + ${vcf} \\ + | bgzip -c ${args2} > ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vcflib: ${VERSION} + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}.filter" + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '1.0.14' + if ("${vcf}" == "${prefix}.vcf.gz") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + """ + echo | gzip > ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vcflib: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/vcflib/vcffilter/meta.yml b/modules/nf-core/vcflib/vcffilter/meta.yml new file mode 100644 index 0000000000..3e51ce3531 --- /dev/null +++ b/modules/nf-core/vcflib/vcffilter/meta.yml @@ -0,0 +1,55 @@ +name: "vcflib_vcffilter" +description: Command line tools for parsing and manipulating VCF files. +keywords: + - filter + - variant + - vcf + - quality +tools: + - "vcflib": + description: "Command line tools for parsing and manipulating VCF files." + homepage: "https://github.com/vcflib/vcflib" + documentation: "https://github.com/vcflib/vcflib" + tool_dev_url: "https://github.com/vcflib/vcflib" + doi: "10.1371/journal.pcbi.1009123" + licence: ["MIT"] + identifier: biotools:vcflib +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test_sample_1' ] + - vcf: + type: file + description: VCF file + pattern: "*.{vcf,vcf.gz}" + ontologies: [] + - tbi: + type: file + description: Index file + pattern: "*.{tbi}" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - "*.vcf.gz": + type: file + description: Filtered VCF file + pattern: "*.{vcf.gz}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@zachary-foster" +maintainers: + - "@zachary-foster" diff --git a/modules/nf-core/vcftools/environment.yml b/modules/nf-core/vcftools/environment.yml new file mode 100644 index 0000000000..ff0e9d03cd --- /dev/null +++ b/modules/nf-core/vcftools/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::vcftools=0.1.16 diff --git a/modules/nf-core/modules/vcftools/main.nf b/modules/nf-core/vcftools/main.nf similarity index 74% rename from modules/nf-core/modules/vcftools/main.nf rename to modules/nf-core/vcftools/main.nf index 78b95fae3f..67bca9af0c 100644 --- a/modules/nf-core/modules/vcftools/main.nf +++ b/modules/nf-core/vcftools/main.nf @@ -1,16 +1,16 @@ process VCFTOOLS { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? "bioconda::vcftools=0.1.16" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/vcftools:0.1.16--he513fc3_4' : - 'quay.io/biocontainers/vcftools:0.1.16--he513fc3_4' }" + 'biocontainers/vcftools:0.1.16--he513fc3_4' }" input: // Owing to the nature of vcftools we here provide solutions to working with optional bed files and optional // alternative variant files, for use with the 'diff' suite of tools. - // Other optional input files can be utilised in a similar way to below but we do not exhaustively itterate through all + // Other optional input files can be utilised in a similar way to below but we do not exhaustively iterate through all // possible options. Instead we leave that to the user. tuple val(meta), path(variant_file) path bed @@ -91,10 +91,14 @@ process VCFTOOLS { def bed_arg = (args.contains('--bed')) ? "--bed ${bed}" : (args.contains('--exclude-bed')) ? "--exclude-bed ${bed}" : - (args.contains('--hapcount')) ? "--hapcount ${bed}" : '' + (args.contains('--hapcount')) ? "--hapcount ${bed}" : + (args.contains('--positions')) ? "--positions ${bed}" : + (args.contains('--exclude-positions')) ? "--exclude-positions ${bed}" : '' args_list.removeIf { it.contains('--bed') } args_list.removeIf { it.contains('--exclude-bed') } args_list.removeIf { it.contains('--hapcount') } + args_list.removeIf { it.contains('--positions') } + args_list.removeIf { it.contains('--exclude-positions') } def diff_variant_arg = (args.contains('--diff')) ? "--diff ${diff_variant_file}" : (args.contains('--gzdiff')) ? "--gzdiff ${diff_variant_file}" : @@ -120,4 +124,76 @@ process VCFTOOLS { vcftools: \$(echo \$(vcftools --version 2>&1) | sed 's/^.*VCFtools (//;s/).*//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf + touch ${prefix}.bcf + touch ${prefix}.frq + touch ${prefix}.frq.count + touch ${prefix}.idepth + touch ${prefix}.ldepth + touch ${prefix}.ldepth.mean + touch ${prefix}.gdepth + touch ${prefix}.hap.ld + touch ${prefix}.geno.ld + touch ${prefix}.geno.chisq + touch ${prefix}.list.hap.ld + touch ${prefix}.list.geno.ld + touch ${prefix}.interchrom.hap.ld + touch ${prefix}.interchrom.geno.ld + touch ${prefix}.TsTv + touch ${prefix}.TsTv.summary + touch ${prefix}.TsTv.count + touch ${prefix}.TsTv.qual + touch ${prefix}.FILTER.summary + touch ${prefix}.sites.pi + touch ${prefix}.windowed.pi + touch ${prefix}.weir.fst + touch ${prefix}.het + touch ${prefix}.hwe + touch ${prefix}.Tajima.D + touch ${prefix}.ifreqburden + touch ${prefix}.LROH + touch ${prefix}.relatedness + touch ${prefix}.relatedness2 + touch ${prefix}.lqual + touch ${prefix}.imiss + touch ${prefix}.lmiss + touch ${prefix}.snpden + touch ${prefix}.kept.sites + touch ${prefix}.removed.sites + touch ${prefix}.singletons + touch ${prefix}.indel.hist + touch ${prefix}.hapcount + touch ${prefix}.mendel + touch ${prefix}.FORMAT + touch ${prefix}.INFO + touch ${prefix}.012 + touch ${prefix}.012.indv + touch ${prefix}.012.pos + touch ${prefix}.impute.hap + touch ${prefix}.impute.hap.legend + touch ${prefix}.impute.hap.indv + touch ${prefix}.ldhat.sites + touch ${prefix}.ldhat.locs + touch ${prefix}.BEAGLE.GL + touch ${prefix}.BEAGLE.PL + touch ${prefix}.ped + touch ${prefix}.map + touch ${prefix}.tped + touch ${prefix}.tfam + touch ${prefix}.diff.sites_in_files + touch ${prefix}.diff.indv_in_files + touch ${prefix}.diff.sites + touch ${prefix}.diff.indv + touch ${prefix}.diff.discordance.matrix + touch ${prefix}.diff.switch + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vcftools: \$(echo \$(vcftools --version 2>&1) | sed 's/^.*VCFtools (//;s/).*//') + END_VERSIONS + """ } diff --git a/modules/nf-core/vcftools/meta.yml b/modules/nf-core/vcftools/meta.yml new file mode 100644 index 0000000000..b9bae4df12 --- /dev/null +++ b/modules/nf-core/vcftools/meta.yml @@ -0,0 +1,687 @@ +name: vcftools +description: A set of tools written in Perl and C++ for working with VCF files +keywords: + - VCFtools + - VCF + - sort +tools: + - vcftools: + description: A set of tools written in Perl and C++ for working with VCF files. + This package only contains the C++ libraries whereas the package perl-vcftools-vcf + contains the perl libraries + homepage: http://vcftools.sourceforge.net/ + documentation: http://vcftools.sourceforge.net/man_latest.html + licence: ["LGPL"] + identifier: biotools:vcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - variant_file: + type: file + description: variant input file which can be vcf, vcf.gz, or bcf format. + - - bed: + type: file + description: bed file which can be used with different arguments in vcftools + (optional) + - - diff_variant_file: + type: file + description: secondary variant file which can be used with the 'diff' suite + of tools (optional) +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf": + type: file + description: vcf file (optional) + pattern: "*.vcf" + - bcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bcf": + type: file + description: bcf file (optional) + pattern: "*.bcf" + - frq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.frq": + type: file + description: Allele frequency for each site (optional) + pattern: "*.frq" + - frq_count: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.frq.count": + type: file + description: Allele counts for each site (optional) + pattern: "*.frq.count" + - idepth: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.idepth": + type: file + description: mean depth per individual (optional) + pattern: "*.idepth" + - ldepth: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.ldepth": + type: file + description: depth per site summed across individuals (optional) + pattern: "*.ildepth" + - ldepth_mean: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.ldepth.mean": + type: file + description: mean depth per site calculated across individuals (optional) + pattern: "*.ldepth.mean" + - gdepth: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.gdepth": + type: file + description: depth for each genotype in vcf file (optional) + pattern: "*.gdepth" + - hap_ld: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.hap.ld": + type: file + description: r2, D, and D’ statistics using phased haplotypes (optional) + pattern: "*.hap.ld" + - geno_ld: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.geno.ld": + type: file + description: squared correlation coefficient between genotypes encoded as 0, + 1 and 2 to represent the number of non-reference alleles in each individual + (optional) + pattern: "*.geno.ld" + - geno_chisq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.geno.chisq": + type: file + description: test for genotype independence via the chi-squared statistic (optional) + pattern: "*.geno.chisq" + - list_hap_ld: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.list.hap.ld": + type: file + description: r2 statistics of the sites contained in the provided input file + verses all other sites (optional) + pattern: "*.list.hap.ld" + - list_geno_ld: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.list.geno.ld": + type: file + description: r2 statistics of the sites contained in the provided input file + verses all other sites (optional) + pattern: "*.list.geno.ld" + - interchrom_hap_ld: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.interchrom.hap.ld": + type: file + description: r2 statistics for sites (haplotypes) on different chromosomes (optional) + pattern: "*.interchrom.hap.ld" + - interchrom_geno_ld: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.interchrom.geno.ld": + type: file + description: r2 statistics for sites (genotypes) on different chromosomes (optional) + pattern: "*.interchrom.geno.ld" + - tstv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.TsTv": + type: file + description: Transition / Transversion ratio in bins of size defined in options + (optional) + pattern: "*.TsTv" + - tstv_summary: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.TsTv.summary": + type: file + description: Summary of all Transitions and Transversions (optional) + pattern: "*.TsTv.summary" + - tstv_count: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.TsTv.count": + type: file + description: Transition / Transversion ratio as a function of alternative allele + count (optional) + pattern: "*.TsTv.count" + - tstv_qual: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.TsTv.qual": + type: file + description: Transition / Transversion ratio as a function of SNP quality threshold + (optional) + pattern: "*.TsTv.qual" + - filter_summary: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.FILTER.summary": + type: file + description: Summary of the number of SNPs and Ts/Tv ratio for each FILTER category + (optional) + pattern: "*.FILTER.summary" + - sites_pi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sites.pi": + type: file + description: Nucleotide divergency on a per-site basis (optional) + pattern: "*.sites.pi" + - windowed_pi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.windowed.pi": + type: file + description: Nucleotide diversity in windows, with window size determined by + options (optional) + pattern: "*windowed.pi" + - weir_fst: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.weir.fst": + type: file + description: Fst estimate from Weir and Cockerham’s 1984 paper (optional) + pattern: "*.weir.fst" + - heterozygosity: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.het": + type: file + description: Heterozygosity on a per-individual basis (optional) + pattern: "*.het" + - hwe: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.hwe": + type: file + description: Contains the Observed numbers of Homozygotes and Heterozygotes + and the corresponding Expected numbers under HWE (optional) + pattern: "*.hwe" + - tajima_d: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.Tajima.D": + type: file + description: Tajima’s D statistic in bins with size of the specified number + in options (optional) + pattern: "*.Tajima.D" + - freq_burden: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.ifreqburden": + type: file + description: Number of variants within each individual of a specific frequency + in options (optional) + pattern: "*.ifreqburden" + - lroh: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.LROH": + type: file + description: Long Runs of Homozygosity (optional) + pattern: "*.LROH" + - relatedness: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.relatedness": + type: file + description: Relatedness statistic based on the method of Yang et al, Nature + Genetics 2010 (doi:10.1038/ng.608) (optional) + pattern: "*.relatedness" + - relatedness2: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.relatedness2": + type: file + description: Relatedness statistic based on the method of Manichaikul et al., + BIOINFORMATICS 2010 (doi:10.1093/bioinformatics/btq559) (optional) + pattern: "*.relatedness2" + - lqual: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.lqual": + type: file + description: per-site SNP quality (optional) + pattern: "*.lqual" + - missing_individual: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.imiss": + type: file + description: Missingness on a per-individual basis (optional) + pattern: "*.imiss" + - missing_site: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.lmiss": + type: file + description: Missingness on a per-site basis (optional) + pattern: "*.lmiss" + - snp_density: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.snpden": + type: file + description: Number and density of SNPs in bins of size defined by option (optional) + pattern: "*.snpden" + - kept_sites: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.kept.sites": + type: file + description: All sites that have been kept after filtering (optional) + pattern: "*.kept.sites" + - removed_sites: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.removed.sites": + type: file + description: All sites that have been removed after filtering (optional) + pattern: "*.removed.sites" + - singeltons: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.singletons": + type: file + description: Location of singletons, and the individual they occur in (optional) + pattern: "*.singeltons" + - indel_hist: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.indel.hist": + type: file + description: Histogram file of the length of all indels (including SNPs) (optional) + pattern: "*.indel_hist" + - hapcount: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.hapcount": + type: file + description: Unique haplotypes within user specified bins (optional) + pattern: "*.hapcount" + - mendel: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.mendel": + type: file + description: Mendel errors identified in trios (optional) + pattern: "*.mendel" + - format: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.FORMAT": + type: file + description: Extracted information from the genotype fields in the VCF file + relating to a specified FORMAT identifier (optional) + pattern: "*.FORMAT" + - info: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.INFO": + type: file + description: Extracted information from the INFO field in the VCF file (optional) + pattern: "*.INFO" + - genotypes_matrix: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.012": + type: file + description: | + Genotypes output as large matrix. + Genotypes of each individual on a separate line. + Genotypes are represented as 0, 1 and 2, where the number represent that number of non-reference alleles. + Missing genotypes are represented by -1 (optional) + pattern: "*.012" + - genotypes_matrix_individual: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.012.indv": + type: file + description: Details the individuals included in the main genotypes_matrix file + (optional) + pattern: "*.012.indv" + - genotypes_matrix_position: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.012.pos": + type: file + description: Details the site locations included in the main genotypes_matrix + file (optional) + pattern: "*.012.pos" + - impute_hap: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.impute.hap": + type: file + description: Phased haplotypes in IMPUTE reference-panel format (optional) + pattern: "*.impute.hap" + - impute_hap_legend: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.impute.hap.legend": + type: file + description: Impute haplotype legend file (optional) + pattern: "*.impute.hap.legend" + - impute_hap_indv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.impute.hap.indv": + type: file + description: Impute haplotype individuals file (optional) + pattern: "*.impute.hap.indv" + - ldhat_sites: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.ldhat.sites": + type: file + description: Output data in LDhat format, sites (optional) + pattern: "*.ldhat.sites" + - ldhat_locs: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.ldhat.locs": + type: file + description: output data in LDhat format, locations (optional) + pattern: "*.ldhat.locs" + - beagle_gl: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.BEAGLE.GL": + type: file + description: Genotype likelihoods for biallelic sites (optional) + pattern: "*.BEAGLE.GL" + - beagle_pl: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.BEAGLE.PL": + type: file + description: Genotype likelihoods for biallelic sites (optional) + pattern: "*.BEAGLE.PL" + - ped: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.ped": + type: file + description: output the genotype data in PLINK PED format (optional) + pattern: "*.ped" + - map_: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.map": + type: file + description: output the genotype data in PLINK PED format (optional) + pattern: "*.map" + - tped: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tped": + type: file + description: output the genotype data in PLINK PED format (optional) + pattern: "*.tped" + - tfam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tfam": + type: file + description: output the genotype data in PLINK PED format (optional) + pattern: "*.tfam" + - diff_sites_in_files: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.diff.sites_in_files": + type: file + description: Sites that are common / unique to each file specified in optional + inputs (optional) + pattern: "*.diff.sites.in.files" + - diff_indv_in_files: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.diff.indv_in_files": + type: file + description: Individuals that are common / unique to each file specified in + optional inputs (optional) + pattern: "*.diff.indv.in.files" + - diff_sites: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.diff.sites": + type: file + description: Discordance on a site by site basis, specified in optional inputs + (optional) + pattern: "*.diff.sites" + - diff_indv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.diff.indv": + type: file + description: Discordance on a individual by individual basis, specified in optional + inputs (optional) + pattern: "*.diff.indv" + - diff_discd_matrix: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.diff.discordance.matrix": + type: file + description: Discordance matrix between files specified in optional inputs (optional) + pattern: "*.diff.discordance.matrix" + - diff_switch_error: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.diff.switch": + type: file + description: Switch errors found between sites (optional) + pattern: "*.diff.switch" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Mark-S-Hill" +maintainers: + - "@Mark-S-Hill" diff --git a/modules/nf-core/yte/environment.yml b/modules/nf-core/yte/environment.yml new file mode 100644 index 0000000000..0537c967ef --- /dev/null +++ b/modules/nf-core/yte/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::yte=1.9.4 diff --git a/modules/nf-core/yte/main.nf b/modules/nf-core/yte/main.nf new file mode 100644 index 0000000000..c0ea25fe1f --- /dev/null +++ b/modules/nf-core/yte/main.nf @@ -0,0 +1,41 @@ +process YTE { + tag "${meta.id}" + label 'process_single' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/a4/a427620cd214cbc7f48c8d43fa818be59c6c4ea4a622331d96a8b063a05335b0/data' + : 'community.wave.seqera.io/library/yte:1.9.4--2a362f82cd32b54a'}" + + input: + tuple val(meta), path(template), path(map_file), val(map) + + output: + tuple val(meta), path("*.yaml"), emit: rendered + tuple val("${task.process}"), val('yte'), eval("echo $VERSION"), topic: versions, emit: versions_yte + + when: + task.ext.when == null || task.ext.when + + script: + // No args because tool does not accept args, only stdin/stdout + def prefix = task.ext.prefix ?: "${meta.id}" + + // Use map_file if provided, otherwise use map to create key=value pairs for mapping command + def mapping_cmd = map_file ? "--variable-file ${map_file}" : "--variables " + map.collect { k, v -> "${k}=${v}" }.join(' ') + VERSION = "1.9.4" + // WARN: Version information not provided by tool on CLI. Please update this string when bumping + """ + yte ${mapping_cmd} < ${template} > ${prefix}.yaml + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + VERSION = "1.9.4" + // WARN: Version information not provided by tool on CLI. Please update this string when bumping + """ + touch ${prefix}.yaml + """ +} diff --git a/modules/nf-core/yte/meta.yml b/modules/nf-core/yte/meta.yml new file mode 100644 index 0000000000..a8350a1e6e --- /dev/null +++ b/modules/nf-core/yte/meta.yml @@ -0,0 +1,77 @@ +name: yte +description: A YAML template engine with Python expressions +keywords: + - yaml + - template + - python +tools: + - yte: + description: "A YAML template engine with Python expressions" + homepage: "https://yte-template-engine.github.io/" + documentation: "https://yte-template-engine.github.io/" + tool_dev_url: "https://github.com/yte-template-engine/yte" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'template1' ]` + - template: + type: file + description: YTE template + pattern: "*.{yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + - map_file: + type: file + description: YAML file containing a map to be used in the template + pattern: "*.{yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + - map: + type: map + description: | + Groovy Map containing mapping information to be used in the template + e.g. `[ key: value ]` with key being a wildcard in the template + +output: + rendered: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.yaml": + type: file + description: Rendered YAML file + pattern: "*.yaml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + versions_yte: + - - ${task.process}: + type: string + description: The process the versions were collected from + - yte: + type: string + description: The tool name + - echo $VERSION: + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - yte: + type: string + description: The tool name + - echo $VERSION: + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@famosab" +maintainers: + - "@famosab" diff --git a/nextflow.config b/nextflow.config index 96653209d1..fd0425b364 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nf-core/sarek Nextflow config file -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Default config options for all compute environments ---------------------------------------------------------------------------------------- */ @@ -10,185 +10,346 @@ params { // Workflow flags: // Mandatory arguments - input = null // No default input - step = 'mapping' // Starts with mapping + // Input options + input = null // No default input + input_restart = null // No default automatic input + step = 'mapping' // Starts with mapping - // Genome and references options - genome = 'GRCh38' - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false - genomes_base = null // Disabled by default - save_reference = false // Built references not saved + // References + genome = 'GATK.GRCh38' + igenomes_base = 's3://ngi-igenomes/igenomes/' + snpeff_cache = 's3://annotation-cache/snpeff_cache/' + vep_cache = 's3://annotation-cache/vep_cache/' + igenomes_ignore = false + save_reference = false // Built references not saved + build_only_index = false // Only build the reference indexes + download_cache = false // Do not download annotation cache // Main options - no_intervals = false // Intervals will be built from the fasta file - nucleotides_per_second = 1000 // Default interval size - sentieon = false // Not using Sentieon by default - skip_qc = null // All QC tools are used - target_bed = null // No default TargetBED file for targeted sequencing - tools = null // No default Variant_Calling or Annotation tools - - // Modify fastqs (trim/split) - trim_fastq = false // No trimming - clip_r1 = 0 - clip_r2 = 0 + no_intervals = false // Intervals will be built from the fasta file + nucleotides_per_second = 200000 // Default interval size + tools = null // No default Variant_Calling or Annotation tools + skip_tools = null // All tools (markduplicates + baserecalibrator + QC) are used by default + split_fastq = 50000000 // FASTQ files will not be split by default by FASTP + wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers + + // Modify fastqs (trim/split) with FASTP + trim_fastq = false // No trimming + clip_r1 = 0 + clip_r2 = 0 three_prime_clip_r1 = 0 three_prime_clip_r2 = 0 - trim_nextseq = 0 - save_trimmed = false - split_fastq = 0 // FASTQ files will not be split by default - save_split_fastqs = false + trim_nextseq = false + length_required = 15 // Default in FastP + save_trimmed = false + save_split_fastqs = false - // UMI tagged reads - umi_read_structure = null // no umi - group_by_umi_strategy = 'Adjacency' // + // UMI handling options + umi_in_read_header = false // No UMI in read header by default + umi_location = null // No UMI location by default + umi_length = null // No UMI length by default + umi_base_skip = null // No UMI base skip by default + umi_tag = null // No UMI tag already inside bam/cram file + umi_read_structure = null // no UMI for fgbio consensus read generation + group_by_umi_strategy = 'Adjacency' // default strategy when running with UMI for GROUPREADSBYUMI - // Preprocessing - aligner = 'bwa-mem' - markdup_java_options = '"-Xms4000m -Xmx7g"' // Established values for markDuplicates memory consumption, see https://github.com/SciLifeLab/Sarek/pull/689 for details - use_gatk_spark = null // GATK Spark implementation of their tools in local mode not used by default - save_bam_mapped = false // Mapped BAMs not saved - skip_markduplicates = false // Do not skip markDuplicates by default - sequencing_center = null // No sequencing center to be written in BAM header in MapReads process + // BBSplit genome filtering + bbsplit_fasta_list = null + save_bbsplit_reads = false + bbsplit_index = null - //BQSR - skip_bqsr = false + // Preprocessing + aligner = 'bwa-mem' // Default is bwa-mem, bwa-mem2 and dragmap can be used too + use_gatk_spark = null // GATK Spark implementation of their tools in local mode not used by default + save_mapped = false // Mapped BAMs not saved + save_output_as_bam = false // Output files from preprocessing are saved as bam and not as cram files + seq_center = null // No sequencing center to be written in read group CN field by aligner + seq_platform = 'ILLUMINA' // Default platform written in read group PL field by aligner + markduplicates_pixel_distance = null // Use default value for GATK MarkDuplicates + sentieon_consensus = false // Use consensus read generation inside Sentieon deduplication step // Variant Calling - ascat_ploidy = null // Use default value - ascat_purity = null // Use default value - cf_coeff = 0.05 // default value for Control-FREEC - cf_contamination = null // by default not specified in Control-FREEC - cf_contamination_adjustment = false // by default we are not using this in Control-FREEC - cf_ploidy = 2 // you can use 2,3,4 - cf_window = null // by default we are not using this in Control-FREEC - generate_gvcf = false // g.vcf are not produced by HaplotypeCaller by default - no_strelka_bp = false // Strelka will use Manta candidateSmallIndels if available - pon = null // No default PON (Panel of Normals) file for GATK Mutect2 / Sentieon TNscope - pon_tbi = null // No default PON index for GATK Mutect2 / Sentieon TNscope - ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2 - wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers - joint_germline = false // + ascat_ploidy = null // default value for ASCAT + ascat_min_base_qual = 20 // default value for ASCAT + ascat_min_counts = 10 // default value for ASCAT + ascat_min_map_qual = 35 // default value for ASCAT + ascat_purity = null // default value for ASCAT + cf_ploidy = "2" // default value for Control-FREEC + cf_coeff = 0.05 // default value for Control-FREEC + cf_contamination = 0 // default value for Control-FREEC + cf_contamination_adjustment = false // by default we are not using this in Control-FREEC + cf_mincov = 0 // ControlFreec default values + cf_minqual = 0 // ControlFreec default values + cf_window = null // by default we are not using this in Control-FREEC + cnvkit_reference = null // by default the reference is build from the fasta file + freebayes_filter = 30 // default filter for freebayes (filtering with vcflib/vcffilter) + gatk_pcr_indel_model = 'CONSERVATIVE' // default value for GATK HaplotypeCaller + ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2 + joint_germline = false // g.vcf & joint germline calling are not run by default if HaplotypeCaller is selected + joint_mutect2 = false // if true, enables patient-wise multi-sample somatic variant calling + only_paired_variant_calling = false // if true, skips germline variant calling for normal-paired sample + sentieon_dnascope_emit_mode = 'variant' // default value for Sentieon dnascope + sentieon_dnascope_pcr_indel_model = 'CONSERVATIVE' + sentieon_haplotyper_emit_mode = 'variant' // default value for Sentieon haplotyper + + // Post variant calling + varlociraptor_chunk_size = 15 // default chunk size for Varlociraptor + varlociraptor_scenario_tumor_only = null // uses default scenario in assets/varlociraptor_tumor_only.yte.yaml + varlociraptor_scenario_somatic = null // uses default scenario in assets/varlociraptor_somatic.yte.yaml + varlociraptor_scenario_germline = null // uses default scenario in assets/varlociraptor_germline.yte.yaml + bcftools_filter_criteria = "-f PASS,." // Filters for VCFs with PASS, some variantcallers just give a . + concatenate_vcfs = false // by default we don't concatenate the germline-vcf-files + filter_vcfs = false // enables filtering of all VCFs with bcftools filter + snv_consensus_calling = false // enables consensus calling of small variant VCF files across different callers for the same sample + consensus_min_count = 2 // minimum number of files that must have variant + normalize_vcfs = false // by default we don't normalize the vcf-files // Annotation - annotate_tools = null // Only with --step annotate - annotation_cache = false // Annotation cache disabled - cadd_cache = null // CADD cache disabled - cadd_indels = null // No CADD InDels file - cadd_indels_tbi = null // No CADD InDels index - cadd_wg_snvs = null // No CADD SNVs file - cadd_wg_snvs_tbi = null // No CADD SNVs index - genesplicer = null // genesplicer disabled within VEP - snpeff_cache = null // No directory for snpEff cache - vep_cache = null // No directory for VEP cache + bcftools_annotations = null // No extra annotation file + bcftools_annotations_tbi = null // No extra annotation file index + bcftools_columns = null // Use all columns from annotation file + bcftools_header_lines = null // No header lines to be added to the VCF file + condel_config = null // No condel config directory + snpsift_databases = null // No SnpSift annotation databases + dbnsfp = null // No dbnsfp processed file + dbnsfp_consequence = null // No default consequence for dbnsfp plugin + dbnsfp_fields = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin + dbnsfp_tbi = null // No dbnsfp processed file index + mastermind_file = null // No mastermind cited variants file + mastermind_mutations = false // Return citations for all mutations/transcripts (default: false) + mastermind_var_iden = false // Return only Mastermind variant identifiers (default: false) + mastermind_url = false // Return the built Mastermind URL (default: false) + outdir_cache = null // No default outdir cache + spliceai_indel = null // No spliceai_indel file + spliceai_indel_tbi = null // No spliceai_indel file index + spliceai_snv = null // No spliceai_snv file + spliceai_snv_tbi = null // No spliceai_snv file index + vep_custom_args = "--everything --filter_common --per_gene --total_length --offline --format vcf" // Default arguments for VEP + vep_condel = false // Condel plugin disabled within VEP + vep_dbnsfp = false // dbnsfp plugin disabled within VEP + vep_include_fasta = false // Don't use fasta file for annotation with VEP + vep_loftee = false // loftee plugin disabled within VEP + vep_mastermind = false // mastermind plugin disabled within VEP + vep_phenotypes = false // phenotypes plugin disabled within VEP + phenotypes_file = null + phenotypes_file_tbi = null + phenotypes_include_types = null + vep_out_format = "vcf" + vep_spliceai = null // spliceai plugin disabled within VEP + vep_spliceregion = null // spliceregion plugin disabled within VEP + vep_version = "115.0-0" // Should be updated when we update VEP, needs this to get full path to some plugins // MultiQC options multiqc_config = null multiqc_title = null + multiqc_logo = null max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options - outdir = './results' - tracedir = "${params.outdir}/pipeline_info" - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - help = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - enable_conda = false + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + + help = false + help_full = false + show_hidden = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null + test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek3' + modules_testdata_base_path = null - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' + // Schema validation default options + validate_params = true } +// workflow output +outputDir = params.outdir +workflow.output.mode = params.publish_dir_mode + // Load base.config by default for all pipelines includeConfig 'conf/base.config' -// Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} - -// Load nf-core/sarek custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/pipeline/sarek.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config/sarek profiles: ${params.custom_config_base}/pipeline/sarek.config") -} - profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true + } conda { - params.enable_conda = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda'] + apptainer.enabled = false + } + mamba { + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' + } + arm64 { + process.arch = 'arm64' + // TODO https://github.com/nf-core/modules/issues/6694 + // For now if you're using arm64 you have to use wave for the sake of the maintainers + // wave profile + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } + emulate_amd64 { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } + gpu { + docker.runOptions = '-u $(id -u):$(id -g) --gpus all' + apptainer.runOptions = '--nv' + singularity.runOptions = '--nv' + } + spark { + docker.runOptions = '' + } + + // Basic test profile for CI + test { includeConfig 'conf/test.config' } + test_aws { + includeConfig 'conf/test.config' + params.sentieon_dnascope_model = "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38/Annotation/Sentieon/SentieonDNAscopeModel1.1.model" + } + test_azure { + includeConfig 'conf/test.config' + params.sentieon_dnascope_model = "az://igenomes/Homo_sapiens/GATK/GRCh38/Annotation/Sentieon/SentieonDNAscopeModel1.1.model" + } + // Extra test profiles for full tests on AWS + test_full { includeConfig 'conf/test_full.config' } + test_full_aws { includeConfig 'conf/test_full.config' } + test_full_azure { + includeConfig 'conf/test_full.config' + params.input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/HCC1395_WXS_somatic_full_test_azure.csv' + params.intervals = 'az://test-data/sarek/S07604624_Padded_Agilent_SureSelectXT_allexons_V6_UTR.bed' + params.igenomes_base = "az://igenomes" } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + test_full_germline { includeConfig 'conf/test_full_germline.config' } + test_full_germline_aws { includeConfig 'conf/test_full_germline.config' } + test_full_germline_azure { + includeConfig 'conf/test_full_germline.config' + params.input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/NA12878_WGS_30x_full_test_azure.csv' + params.igenomes_base = "az://igenomes" + } + test_full_germline_ncbench_agilent { + includeConfig 'conf/test_full_germline_ncbench_agilent.config' + } + // Extra test profiles for more complete CI + mutect { includeConfig 'conf/test_mutect2.config' } } +// Load nf-core custom profiles from different institutions + +// If params.custom_config_base is set AND either the NXF_OFFLINE environment variable is not set or params.custom_config_base is a local path, the nfcore_custom.config file from the specified base path is included. +// Load nf-core/sarek custom profiles from different institutions. +includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" + + +// Load nf-core/sarek custom profiles from different institutions. +includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/pipeline/sarek.config" : "/dev/null" + +// Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' +charliecloud.registry = 'quay.io' + // Load igenomes.config if required -if (!params.igenomes_ignore) { - includeConfig 'conf/igenomes.config' -} else { - params.genomes = [:] -} +includeConfig !params.igenomes_ignore ? 'conf/igenomes.config' : 'conf/igenomes_ignored.config' // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. @@ -201,69 +362,433 @@ env { JULIA_DEPOT_PATH = "/usr/local/share/julia" } -// Capture exit codes from upstream processes when piping -process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Set bash options +process.shell = [ + "bash", + "-C", // No clobber - prevent output redirection from overwriting files. + "-e", // Exit if a tool returns a non-zero status/exit code + "-u", // Treat unset variables and parameters as an error + "-o", // Returns the status of the last command to exit.. + "pipefail" // ..with a non-zero status or zero if all successfully execute +] + +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${params.trace_report_suffix}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${params.trace_report_suffix}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${params.trace_report_suffix}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" + file = "${params.outdir}/pipeline_info/pipeline_dag_${params.trace_report_suffix}.html" +} +prov { + enabled = true + formats { + bco { + file = "${params.outdir}/pipeline_info/manifest_${params.trace_report_suffix}.bco.json" + } + } } manifest { name = 'nf-core/sarek' - author = 'Maxime Garcia, Szilveszter Juhos' + contributors = [ + [ + name: ' Friederike Hanssen', + affiliation: 'Seqera', + email: 'friederike.hanssen@seqera.io', + github: '@friederikehanssen', + contribution: ['author', 'maintainer'], + orcid: '0009-0001-9875-5262' + ], + [ + name: 'Maxime U Garcia', + affiliation: 'NGI', + email: 'maxime.garcia@scilifelab.se', + github: '@maxulysse', + contribution: ['author', 'maintainer'], + orcid: '0000-0003-2827-9261' + ], + [ + name: 'Szilveszter Juhos', + github: '@szilvajuhos', + contribution: ['author'], + orcid: '0000-0001-6280-4643' + ], + [ + name: 'Abhinav Sharma', + github: '@abhi18av', + contribution: ['contributor'], + ], + [ + name: 'Adam Talbot', + affiliation: 'Seqera', + github: '@adamrtalbot', + contribution: ['contributor'], + ], + [ + name: 'Adrian Lärkeryd', + github: '@adrlar', + contribution: ['contributor'], + ], + [ + name: 'Àitor Olivares', + github: '@AitorPeseta', + contribution: ['contributor'], + ], + [ + name: 'Alexander Peltzer', + github: '@apeltzer', + contribution: ['contributor'], + ], + [ + name: 'Alison Meynert', + github: '@ameynert', + contribution: ['contributor'], + ], + [ + name: 'Anders Sune Pedersen', + github: '@asp8200', + contribution: ['contributor'], + ], + [ + name: 'Aron Skaftason', + github: '@arontommi', + contribution: ['contributor'], + ], + [ + name: 'Barry Digby', + github: '@BarryDigby', + contribution: ['contributor'], + ], + [ + name: 'Bekir Ergüner', + github: '@berguner', + contribution: ['contributor'], + ], + [ + name: 'Björn Nystedt', + github: '@bjornnystedt', + contribution: ['contributor'], + ], + [ + name: 'Christina Chatzipantsiou', + github: '@cgpu', + contribution: ['contributor'], + ], + [ + name: 'Chela James', + github: '@chelauk', + contribution: ['contributor'], + ], + [ + name: 'David Mas-Ponte', + github: '@davidmasp', + contribution: ['contributor'], + ], + [ + name: 'Edmund Miller', + affiliation: 'Seqera', + github: '@edmundmiller', + contribution: ['contributor'], + ], + [ + name: 'Famke Bäuerle', + affiliation: 'Quantitative Biology Center (QBiC) Tübingen, University of Tübingen, Germany', + github: '@famosab', + contribution: ['contributor'], + orcid: '0000-0003-1387-0251' + ], + [ + name: 'Francesco Lescai', + github: '@lescai', + contribution: ['contributor'], + ], + [ + name: 'Francisco Martínez', + github: '@nevinwu', + contribution: ['contributor'], + ], + [ + name: 'Gavin Mackenzie', + github: '@GCJMackenzie', + contribution: ['contributor'], + ], + [ + name: 'Gisela Gabernet', + github: '@ggabernet', + contribution: ['contributor'], + ], + [ + name: 'Grant Neilson', + github: '@grantn5', + contribution: ['contributor'], + ], + [ + name: 'Max Käller', + github: '@gulfshores', + contribution: ['contributor'], + ], + [ + name: 'Harshil Patel', + affiliation: 'Seqera', + github: '@drpatelh', + contribution: ['contributor'], + ], + [ + name: 'Hongwei Ye', + github: '@YeHW', + contribution: ['contributor'], + ], + [ + name: 'James A. Fellows Yates', + github: '@jfy133', + contribution: ['contributor'], + ], + [ + name: 'Jesper Eisfeldt', + github: '@J35P312', + contribution: ['contributor'], + ], + [ + name: 'Johannes Alneberg', + github: '@alneberg', + contribution: ['contributor'], + ], + [ + name: 'Jonas Kjellin', + affiliation: 'National Genomics Infrastructure (NGI), Uppsala University, Sweden', + github: '@kjellinjonas', + contribution: ['contributor'], + orcid: '0000-0002-3830-7046' + ], + [ + name: 'José Fernández Navarro', + github: '@jfnavarro', + contribution: ['contributor'], + ], + [ + name: 'Júlia Mir Pedrol', + github: '@mirpedrol', + contribution: ['contributor'], + ], + [ + name: 'Ken Brewer', + affiliation: 'Seqera', + github: '@kenibrewer', + contribution: ['contributor'], + ], + [ + name: 'Lasse Westergaard Folkersen', + github: '@lassefolkersen', + contribution: ['contributor'], + ], + [ + name: 'Lucia Conde', + github: '@lconde-ucl', + contribution: ['contributor'], + ], + [ + name: 'Louis Le Nézet', + github: '@LouisLeNezet', + contribution: ['contributor'], + ], + [ + name: 'Malin Larsson', + github: '@malinlarsson', + contribution: ['contributor'], + ], + [ + name: 'Marcel Martin', + github: '@marcelm', + contribution: ['contributor'], + ], + [ + name: 'Nick Smith', + github: '@nickhsmith', + contribution: ['contributor'], + ], + [ + name: 'Nicolas Schcolnicov', + github: '@nschcolnicov', + contribution: ['contributor'], + ], + [ + name: 'Nilesh Tawari', + github: '@nilesh-tawari', + contribution: ['contributor'], + ], + [ + name: 'Nils Homer', + github: '@nh13', + contribution: ['contributor'], + ], + [ + name: 'Olga Botvinnik', + github: '@olgabot', + contribution: ['contributor'], + ], + [ + name: 'Oskar Wacker', + github: '@WackerO', + contribution: ['contributor'], + ], + [ + name: 'Pall Olason', + github: '@pallolason', + contribution: ['contributor'], + ], + [ + name: 'Paul Cantalupo', + github: '@pcantalupo', + contribution: ['contributor'], + ], + [ + name: 'Phil Ewels', + affiliation: 'Seqera', + github: '@ewels', + contribution: ['contributor'], + ], + [ + name: 'Pierre Lindenbaum', + github: '@lindenb', + contribution: ['contributor'], + ], + [ + name: 'Sabrina Krakau', + github: '@skrakau', + contribution: ['contributor'], + ], + [ + name: 'Sam Minot', + github: '@sminot', + contribution: ['contributor'], + ], + [ + name: 'Sebastian DiLorenzo', + github: '@Sebastian-D', + contribution: ['contributor'], + ], + [ + name: 'Silvia Morini', + github: '@silviamorins', + contribution: ['contributor'], + ], + [ + name: 'Simon Pearce', + affiliation: 'NeoGenomics Laboratories', + github: '@SPPearce', + contribution: ['contributor'], + orcid: '0000-0002-1680-5538' + ], + [ + name: 'Solenne Correard', + github: '@scorreard', + contribution: ['contributor'], + ], + [ + name: 'Susanne Jodoin', + github: '@SusiJo', + contribution: ['contributor'], + ], + [ + name: 'Tobias Koch', + github: '@KochTobi', + contribution: ['contributor'], + ], + [ + name: 'Winni Kretzschmar', + github: '@winni2k', + contribution: ['contributor'], + ], + [ + name: 'Patricie Skaláková', + github: '@Patricie34', + contribution: ['contributor'], + ] + ] homePage = 'https://github.com/nf-core/sarek' - description = 'An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing' + icon = './docs/images/sarek_icon.png' + description = """An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing""" mainScript = 'main.nf' - nextflowVersion = '!>=21.10.3' - version = '3.0dev' + defaultBranch = 'master' + nextflowVersion = '!>=25.10.2' + version = '3.9.0dev' + doi = '10.12688/f1000research.16665.2, 10.1093/nargab/lqae031, 10.5281/zenodo.3476425' } -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' - -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } +// Nextflow plugins +plugins { + id 'nf-core-utils@0.4.0' // nf-core-utils is a collection of utilities for Nextflow pipelines + id 'nf-fgbio@1.0.0' // Validation of read structures passed to fgbio consensus generation + id 'nf-prov@1.2.2' // Provenance reports for pipeline runs + id 'nf-schema@2.6.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet } + +validation { + defaultIgnoreParams = ["genomes", "freebayes_filter", "vep_cache_version"] + monochromeLogs = params.monochrome_logs +} + +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules/modules.config' + +// Load more modules specific config for DSL2 module specific options + +// prepare reference +includeConfig 'conf/modules/download_cache.config' +includeConfig 'conf/modules/prepare_genome.config' +includeConfig 'conf/modules/prepare_intervals.config' + +// preprocessing +includeConfig 'conf/modules/aligner.config' +includeConfig 'conf/modules/aligner_parabricks.config' +includeConfig 'conf/modules/alignment_to_fastq.config' +includeConfig 'conf/modules/contamination.config' +includeConfig 'conf/modules/markduplicates.config' +includeConfig 'conf/modules/sentieon_dedup.config' +includeConfig 'conf/modules/prepare_recalibration.config' +includeConfig 'conf/modules/recalibrate.config' +includeConfig 'conf/modules/trimming.config' +includeConfig 'conf/modules/umi.config' + +//ngscheckmate +includeConfig 'conf/modules/ngscheckmate.config' + +// variant calling +includeConfig 'conf/modules/ascat.config' +includeConfig 'conf/modules/cnvkit.config' +includeConfig 'conf/modules/controlfreec.config' +includeConfig 'conf/modules/deepvariant.config' +includeConfig 'conf/modules/freebayes.config' +includeConfig 'conf/modules/haplotypecaller.config' +includeConfig 'conf/modules/indexcov.config' +includeConfig 'conf/modules/joint_germline.config' +includeConfig 'conf/modules/manta.config' +includeConfig 'conf/modules/mpileup.config' +includeConfig 'conf/modules/msisensorpro.config' +includeConfig 'conf/modules/msisensor2.config' +includeConfig 'conf/modules/muse.config' +includeConfig 'conf/modules/mutect2.config' +includeConfig 'conf/modules/sentieon_dnascope.config' +includeConfig 'conf/modules/sentieon_haplotyper.config' +includeConfig 'conf/modules/sentieon_joint_germline.config' +includeConfig 'conf/modules/sentieon_tnscope.config' +includeConfig 'conf/modules/strelka.config' +includeConfig 'conf/modules/tiddit.config' +includeConfig 'conf/modules/post_variant_calling.config' +includeConfig 'conf/modules/lofreq.config' +includeConfig 'conf/modules/varlociraptor.config' + +//annotate +includeConfig 'conf/modules/annotate.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index b1e09b6135..929a9d98d0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,327 +1,570 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/sarek/master/nextflow_schema.json", "title": "nf-core/sarek pipeline parameters", "description": "An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing", "type": "object", - "definitions": { + "$defs": { "input_output_options": { "title": "Input/output options", "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "step" - ], + "help_text": "Specify input samplesheet, step and output folder.", + "required": ["step", "outdir"], "properties": { + "input": { + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "A design file with information about the samples in your experiment. Use this parameter to specify the location of the input files. It has to be a comma-separated file with a header row. See [usage docs](https://nf-co.re/sarek/usage#input).\n\nIf no input file is specified, sarek will attempt to locate one in the `{outdir}` directory. If no input should be supplied, i.e. when --step is supplied or --build_only_index, then set --input false", + "fa_icon": "fas fa-file-csv", + "type": "string", + "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", + "pattern": "^\\S+\\.(csv|tsv|yaml|yml|json)$" + }, + "input_restart": { + "type": "string", + "description": "Automatic retrieval for restart", + "fa_icon": "fas fa-file-csv", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.(csv|tsv|yaml|yml|json)$", + "hidden": true, + "schema": "assets/schema_input.json" + }, "step": { "type": "string", "default": "mapping", "fa_icon": "fas fa-play", - "description": "Starting step.", - "help_text": "Only one step", + "description": "Starting step", + "help_text": "The pipeline starts from this step and then runs through the possible subsequent steps.", "enum": [ "mapping", + "markduplicates", "prepare_recalibration", "recalibrate", "variant_calling", - "annotate", - "controlfreec" + "annotate" ] }, - "input": { - "type": "string", - "format": "file-path", - "mimetype": "text/csv", - "pattern": "\\.csv$", - "schema": "assets/schema_input.json", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with a header row. See [usage docs](https://nf-co.re/sarek/usage#input).", - "fa_icon": "fas fa-file-csv" - }, "outdir": { "type": "string", - "description": "Path to the output directory where the results will be saved.", - "default": "./results", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" } - }, - "help_text": "" + } }, "main_options": { "title": "Main options", "type": "object", - "description": "Option used for most of the pipeline", + "description": "Most common options used for the pipeline", "default": "", "properties": { - "tools": { + "split_fastq": { + "type": "integer", + "oneOf": [ + { + "minimum": 250 + }, + { + "const": 0 + } + ], + "default": 50000000, + "fa_icon": "fas fa-clock", + "description": "Specify how many reads each split of a FastQ file contains. Set 0 to turn off splitting at all.", + "help_text": "Use the the tool FastP to split FASTQ file by number of reads. This parallelizes across fastq file shards speeding up mapping. Note although the minimum value is 250 reads, if you have fewer than 250 reads a single FASTQ shard will still be created." + }, + "nucleotides_per_second": { + "type": "integer", + "fa_icon": "fas fa-clock", + "description": "Estimate interval size.", + "help_text": "Intervals are parts of the chopped up genome used to speed up preprocessing and variant calling. See `--intervals` for more info. \n\nChanging this parameter, changes the number of intervals that are grouped and processed together. Bed files from target sequencing can contain thousands or small intervals. Spinning up a new process for each can be quite resource intensive. Instead it can be desired to process small intervals together on larger nodes. \nIn order to make use of this parameter, no runtime estimate can be present in the bed file (column 5). ", + "default": 200000, + "minimum": 1 + }, + "intervals": { "type": "string", - "fa_icon": "fas fa-toolbox", - "description": "Tools to use for variant calling and/or for annotation.", - "help_text": "Multiple separated with commas.\n\nGermline variant calling can currently only be performed with the following variant callers:\n- FreeBayes, HaplotypeCaller, Manta, mpileup, Strelka, TIDDIT\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- ASCAT, Control-FREEC, FreeBayes, Manta, MSIsensorpro, Mutect2, Strelka\n\nTumor-only somatic variant calling can currently only be performed with the following variant callers:\n- Control-FREEC, Manta, mpileup, Mutect2, TIDDIT\n\nAnnotation is done using snpEff, VEP, or even both consecutively.\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted.\n\n\n\n`DNAseq`, `DNAscope` and `TNscope` are only available with `--sentieon`\n\n> **NB** tools can be specified with no concern for case.\n", - "pattern": "^((ascat|cnvkit|controlfreec|deepvariant|dnascope|dnaseq|freebayes|haplotypecaller|manta|merge|mpileup|msisensorpro|mutect2|snpeff|strelka|tiddit|tnscope|vep)*,?)*$" + "fa_icon": "fas fa-file-alt", + "format": "file-path", + "pattern": "\\S+\\.(bed|interval_list)$", + "exists": true, + "help_text": "To speed up preprocessing and variant calling processes, the execution is parallelized across a reference chopped into smaller pieces.\n\nParts of preprocessing and variant calling are done by these intervals, the different resulting files are then merged.\nThis can parallelize processes, and push down wall clock time significantly.\n\nWe are aligning to the whole genome, and then run Base Quality Score Recalibration and Variant Calling on the supplied regions.\n\n**Whole Genome Sequencing:**\n\nThe (provided) intervals are chromosomes cut at their centromeres (so each chromosome arm processed separately) also additional unassigned contigs.\n\nWe are ignoring the `hs37d5` contig that contains concatenated decoy sequences.\n\nThe calling intervals can be defined using a .list or a BED file.\nA .list file contains one interval per line in the format `chromosome:start-end` (1-based coordinates).\nA BED file must be a tab-separated text file with one interval per line.\nThere must be at least three columns: chromosome, start, and end (0-based coordinates).\nAdditionally, the score column of the BED file can be used to provide an estimate of how many seconds it will take to call variants on that interval.\nThe fourth column remains unused.\n\n```\n|chr1|10000|207666|NA|47.3|\n```\nThis indicates that variant calling on the interval chr1:10001-207666 takes approximately 47.3 seconds.\n\nThe runtime estimate is used in two different ways.\nFirst, when there are multiple consecutive intervals in the file that take little time to compute, they are processed as a single job, thus reducing the number of processes that needs to be spawned.\nSecond, the jobs with largest processing time are started first, which reduces wall-clock time.\nIf no runtime is given, a time of 200000 nucleotides per second is assumed. See `--nucleotides_per_second` on how to customize this.\nActual figures vary from 2 nucleotides/second to 30000 nucleotides/second.\nIf you prefer, you can specify the full path to your reference genome when you run the pipeline:\n\n> **NB** If none provided, will be generated automatically from the FASTA reference\n> **NB** Use --no_intervals to disable automatic generation.\n\n**Targeted Sequencing:**\n\nThe recommended flow for targeted sequencing data is to use the workflow as it is, but also provide a `BED` file containing targets for all steps using the `--intervals` option. In addition, the parameter `--wes` should be set.\nIt is advised to pad the variant calling regions (exons or target) to some extent before submitting to the workflow.\n\nThe procedure is similar to whole genome sequencing, except that only BED file are accepted. See above for formatting description.\nAdding every exon as an interval in case of `WES` can generate >200K processes or jobs, much more forks, and similar number of directories in the Nextflow work directory. These are appropriately grouped together to reduce number of processes run in parallel (see above and `--nucleotides_per_second` for details). \nFurthermore, primers and/or baits are not 100% specific, (certainly not for MHC and KIR, etc.), quite likely there going to be reads mapping to multiple locations.\nIf you are certain that the target is unique for your genome (all the reads will certainly map to only one location), and aligning to the whole genome is an overkill, it is actually better to change the reference itself.", + "description": "Path to target bed file in case of whole exome or targeted sequencing or intervals file." }, "no_intervals": { "type": "boolean", "fa_icon": "fas fa-ban", "description": "Disable usage of intervals.", - "help_text": "Intervals are part of the genome chopped up, used to speed up preprocessing and variant calling" + "help_text": "Intervals are parts of the chopped up genome used to speed up preprocessing and variant calling. See `--intervals` for more info. \n\nIf `--no_intervals` is set no intervals will be taken into account for speed up or data processing." }, - "nucleotides_per_second": { - "type": "number", - "fa_icon": "fas fa-clock", - "description": "Estimate interval size.", - "help_text": "Intervals are part of the genome chopped up, used to speed up preprocessing and variant calling", - "default": 1000 - }, - "sentieon": { + "wes": { "type": "boolean", - "fa_icon": "fas fa-tools", - "description": "Enable Sentieon if available.", - "help_text": "Sentieon is a commercial solution to process genomics data with high computing efficiency, fast turnaround time, exceptional accuracy, and 100% consistency.\n\n> **NB** Adds the following tools for the `--tools` options: `DNAseq`, `DNAscope` and `TNscope`." + "fa_icon": "fas fa-dna", + "description": "Enable when exome or panel data is provided.", + "help_text": "With this parameter flags in various tools are set for targeted sequencing data. It is recommended to enable for whole-exome and panel data analysis." }, - "skip_qc": { + "tools": { "type": "string", - "fa_icon": "fas fa-forward", - "description": "Disable specified QC and Reporting tools.", - "help_text": "Multiple tools can be specified, separated by commas.\n\n> **NB** `--skip_qc BaseRecalibrator` is actually just not saving the reports.\n> **NB** `--skip_qc MarkDuplicates` does not skip `MarkDuplicates` but prevent the collection of duplicate metrics that slows down performance.\n> **NB** tools can be specified with no concern for case.", - "pattern": "^((bamqc|baserecalibrator|bcftools|documentation|fastqc|markduplicates|multiqc|samtools|vcftools|versions|deeptools)*(,)*)*$" + "fa_icon": "fas fa-toolbox", + "description": "Tools to use for contamination removal, duplicate marking, variant calling and/or for annotation.", + "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper \n- Structural Variants: indexcov, Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Lofreq, mpileup, Mutect2, Sentieon TNScope, Strelka\n- Structural Variants: Manta, Sentieon TNScope, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Sentieon TNScope, Strelka2 \n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC, Sentieon TNScope \n- Microsatellite Instability: MSIsensor2, MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", + "pattern": "^((ascat|bbsplit|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|indexcov|lofreq|manta|merge|mpileup|msisensor2|msisensorpro|muse|mutect2|ngscheckmate|sentieon_dedup|sentieon_dnascope|sentieon_haplotyper|sentieon_tnscope|snpeff|snpsift|strelka|tiddit|vep|varlociraptor)?,?)*(?200K processes or jobs, much more forks, and similar number of directories in the Nextflow work directory.\nFurthermore, primers and/or baits are not 100% specific, (certainly not for MHC and KIR, etc.), quite likely there going to be reads mapping to multiple locations.\nIf you are certain that the target is unique for your genome (all the reads will certainly map to only one location), and aligning to the whole genome is an overkill, it is actually better to change the reference itself.\n\nThe recommended flow for targeted sequencing data is to use the workflow as it is, but also provide a `BED` file containing targets for all steps using the `--target_bed` option.\nThe workflow will pick up these intervals, and activate any `--exome` flag in any tools that allow it to process deeper coverage.\nIt is advised to pad the variant calling regions (exons or target) to some extent before submitting to the workflow." - }, - "wes": { - "type": "boolean", - "fa_icon": "fas fa-dna", - "description": "Enable when exome or panel data is provided" + "fa_icon": "fas fa-forward", + "description": "Disable specified tools.", + "help_text": "Multiple tools can be specified, separated by commas.\n\n> **NB** `--skip_tools baserecalibrator_report` is actually just not saving the reports.\n> **NB** `--skip_tools markduplicates_report` does not skip `MarkDuplicates` but prevent the collection of duplicate metrics that slows down performance.", + "pattern": "^((baserecalibrator|baserecalibrator_report|bcftools|dnascope_filter|documentation|fastqc|haplotypecaller_filter|haplotyper_filter|markduplicates|markduplicates_report|mosdepth|multiqc|samtools|vcftools|versions)?,?)*(? **WARNING** Current indices for `bwa` in AWS iGenomes are not compatible with `bwa-mem2`.\n> Use `--bwa=false` to have `Sarek` build them automatically.\n\n> **WARNING** BWA-mem2 is in active development\n> Sarek might not be able to require the right amount of resources for it at the moment\n> We recommend to use pre-built indexes", - "hidden": true + "help_text": "Sarek will build missing indices automatically if not provided. Set `--bwa false` if indices should be (re-)built.\nIf DragMap is selected as aligner, it is recommended to skip baserecalibration with `--skip_tools baserecalibrator`. For more info see [here](https://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode)." }, - "markdup_java_options": { - "type": "string", - "default": "\"-Xms4000m -Xmx7g\"", - "fa_icon": "fas fa-memory", - "description": "Establish values for GATK MarkDuplicates memory consumption", - "help_text": "See [SciLifeLab/Sarek/pull/689](https://github.com/SciLifeLab/Sarek/pull/689)", - "hidden": true + "save_mapped": { + "type": "boolean", + "fa_icon": "fas fa-download", + "description": "Save mapped files.", + "help_text": "If the parameter `--split-fastq` is used, the sharded bam files are merged and converted to CRAM before saving them." + }, + "save_output_as_bam": { + "type": "boolean", + "description": "Saves output from mapping (if `--save_mapped`), Markduplicates & Baserecalibration as BAM file instead of CRAM", + "fa_icon": "fas fa-download" }, "use_gatk_spark": { "type": "string", "fa_icon": "fas fa-forward", - "description": "Tools for which to enable usage of GATK Spark implementation", - "help_text": "Multiple separated with commas.\n\n GATK4 BQSR tools are currently only available as Beta release. Use with caution!", - "pattern": "^((markduplicates|bqsr)*,?)*$" + "description": "Enable usage of GATK Spark implementation for duplicate marking and/or base quality score recalibration", + "help_text": "Multiple separated with commas.\n\n> The GATK4 Base Quality Score recalibration tools `Baserecalibrator` and `ApplyBQSR` are currently available as Beta release. Please be aware that `--use_gatk_spark` is not compatible with `--save_output_as_bam --save_mapped`. Use with caution!", + "pattern": "^((baserecalibrator|markduplicates)?,?)*(? 20) or 1/1000 (QUAL > 30). Where the default setting for sarek is QUAL > 30." + }, + "joint_germline": { "type": "boolean", - "fa_icon": "fas fa-copy", - "description": "Generate g.vcf output from GATK HaplotypeCaller" + "fa_icon": "fas fa-toolbox", + "description": "Turn on the joint germline variant calling for GATK haplotypecaller", + "help_text": "Uses all normal germline samples (as designated by `status` in the input csv) in the joint germline variant calling process." + }, + "joint_mutect2": { + "type": "boolean", + "fa_icon": "fas fa-angle-double-right", + "description": "Runs Mutect2 in joint (multi-sample) mode for better concordance among variant calls of tumor samples from the same patient. Mutect2 outputs will be stored in a subfolder named with patient ID under `variant_calling/mutect2/` folder. Only a single normal sample per patient is allowed. Tumor-only mode is also supported." }, - "no_strelka_bp": { + "ignore_soft_clipped_bases": { "type": "boolean", "fa_icon": "fas fa-ban", - "description": "Will not use Manta candidateSmallIndels for Strelka", - "help_text": "Not recommended by Best Practices" + "description": "Do not analyze soft clipped bases in the reads for GATK Mutect2.", + "help_text": "use the `--dont-use-soft-clipped-bases` params with GATK Mutect2." }, "pon": { "type": "string", "fa_icon": "fas fa-file", - "description": "Panel-of-normals VCF (bgzipped) for GATK Mutect2 / Sentieon TNscope", - "help_text": "Without PON, there will be no calls with PASS in the INFO field, only an unfiltered VCF is written.\nIt is recommended to make your own PON, as it depends on sequencer and library preparation.\nFor tests in iGenomes there is a dummy PON file in the Annotation/GermlineResource directory, but it should not be used as a real PON file.\n\nSee [PON documentation](https://gatk.broadinstitute.org/hc/en-us/articles/360042479112-CreateSomaticPanelOfNormals-BETA)\n> **NB** PON file should be bgzipped." + "format": "file-path", + "pattern": "^\\S+\\.vcf\\.gz$", + "exists": true, + "description": "Panel-of-normals VCF (bgzipped) for GATK Mutect2", + "help_text": "Without PON, there will be no calls with PASS in the INFO field, only an unfiltered VCF is written.\nIt is highly recommended to make your own PON, as it depends on sequencer and library preparation.\n\nThe pipeline is shipped with a panel-of-normals for `--genome GATK.GRCh38` provided by [GATK](https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-). \n\nSee [PON documentation](https://gatk.broadinstitute.org/hc/en-us/articles/360042479112-CreateSomaticPanelOfNormals-BETA)\n> **NB** PON file should be bgzipped." }, "pon_tbi": { "type": "string", "fa_icon": "fas fa-file", - "description": "Index of PON panel-of-normals VCF", + "format": "file-path", + "pattern": "^\\S+\\.vcf\\.gz\\.tbi$", + "exists": true, + "description": "Index of PON panel-of-normals VCF.", "help_text": "If none provided, will be generated automatically from the PON bgzipped VCF file." }, - "ignore_soft_clipped_bases": { + "sentieon_haplotyper_emit_mode": { + "type": "string", + "default": "variant", + "description": "Option for selecting output and emit-mode of Sentieon's Haplotyper.", + "fa_icon": "fas fa-toolbox", + "help_text": "The option `--sentieon_haplotyper_emit_mode` can be set to the same string values as the Haplotyper's `--emit_mode`. To output both a vcf and a gvcf, specify both a vcf-option (currently, `all`, `confident` and `variant`) and `gvcf`. For example, to obtain a vcf and gvcf one could set `--sentieon_haplotyper_emit_mode` to `variant, gvcf`.", + "pattern": "^(all|confident|gvcf|variant|gvcf,all|gvcf,confident|gvcf,variant|all,gvcf|confident,gvcf|variant,gvcf)(? **NB** If none provided, will be generated automatically from the FASTA reference." + "help_text": "If you wish to recompute indices available on igenomes, set `--bwa false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." }, - "chr_dir": { + "bwamem2": { "type": "string", - "fa_icon": "fas fa-folder-open", - "description": "Path to chromosomes folder." + "format": "directory-path", + "exists": true, + "fa_icon": "fas fa-copy", + "description": "Path to bwa-mem2 mem indices.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nIf you wish to recompute indices available on igenomes, set `--bwamem2 false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner bwa-mem2` is specified. Combine with `--save_reference` to save for future runs." }, - "chr_length": { + "chr_dir": { "type": "string", - "fa_icon": "fas fa-file", - "description": "Path to chromosomes length file." + "format": "path", + "exists": true, + "fa_icon": "fas fa-folder-open", + "description": "Path to chromosomes folder used with ControLFREEC.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, "dbsnp": { "type": "string", + "format": "file-path", + "pattern": "^\\S+\\.vcf\\.gz$", + "exists": true, "fa_icon": "fas fa-file", - "description": "Path to dbsnp file." + "description": "Path to dbsnp file.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, "dbsnp_tbi": { "type": "string", + "format": "file-path", + "pattern": "^\\S+\\.vcf\\.gz\\.tbi$", + "exists": true, "fa_icon": "fas fa-file", "description": "Path to dbsnp index.", - "help_text": "> **NB** If none provided, will be generated automatically from the dbsnp file." + "help_text": "> **NB** If none provided, will be generated automatically from the dbsnp file. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + }, + "dbsnp_vqsr": { + "type": "string", + "fa_icon": "fas fa-copy", + "description": "Label string for VariantRecalibration (haplotypecaller joint variant calling).\n\nIf you use AWS iGenomes, this has already been set for you appropriately." }, "dict": { "type": "string", + "format": "file-path", + "pattern": "^\\S+\\.dict$", + "exists": true, "fa_icon": "fas fa-file", "description": "Path to FASTA dictionary file.", - "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference." + "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + }, + "dragmap": { + "type": "string", + "format": "directory-path", + "exists": true, + "fa_icon": "fas fa-copy", + "description": "Path to dragmap indices.", + "help_text": "If you wish to recompute indices available on igenomes, set `--dragmap false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner dragmap` is specified. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." }, "fasta": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", - "pattern": "\\.fn?a(sta)?(\\.gz)?$", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" + "help_text": "This parameter is *mandatory* if `--genome` is not specified.\n\nIf you use AWS iGenomes, this has already been set for you appropriately.", + "fa_icon": "fas fa-file" }, "fasta_fai": { "type": "string", "fa_icon": "fas fa-file", - "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately.", "description": "Path to FASTA reference index." }, "germline_resource": { "type": "string", "fa_icon": "fas fa-file", - "description": "Path to GATK Mutect2 Germline Resource File", - "help_text": "The germline resource VCF file (bgzipped and tabixed) needed by GATK4 Mutect2 is a collection of calls that are likely present in the sample, with allele frequencies.\nThe AF info field must be present.\nYou can find a smaller, stripped gnomAD VCF file (most of the annotation is removed and only calls signed by PASS are stored) in the AWS iGenomes Annotation/GermlineResource folder." + "format": "file-path", + "pattern": "\\S+\\.vcf\\.gz$", + "mimetype": "text/plain", + "description": "Path to GATK Mutect2 Germline Resource File.", + "help_text": "The germline resource VCF file (bgzipped and tabixed) needed by GATK4 Mutect2 is a collection of calls that are likely present in the sample, with allele frequencies.\nThe AF info field must be present.\nYou can find a smaller, stripped gnomAD VCF file (most of the annotation is removed and only calls signed by PASS are stored) in the AWS iGenomes Annotation/GermlineResource folder.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." }, "germline_resource_tbi": { "type": "string", "fa_icon": "fas fa-file", - "description": "Path to GATK Mutect2 Germline Resource Index", - "help_text": "> **NB** If none provided, will be generated automatically from the Germline Resource file, if provided" - }, - "intervals": { - "type": "string", - "fa_icon": "fas fa-file-alt", - "help_text": "To speed up some preprocessing and variant calling processes, the reference is chopped into smaller pieces.\nThe intervals are chromosomes cut at their centromeres (so each chromosome arm processed separately) also additional unassigned contigs.\nWe are ignoring the `hs37d5` contig that contains concatenated decoy sequences.\nParts of preprocessing and variant calling are done by these intervals, and the different resulting files are then merged.\nThis can parallelize processes, and push down wall clock time significantly.\n\nThe calling intervals can be defined using a .list or a BED file.\nA .list file contains one interval per line in the format `chromosome:start-end` (1-based coordinates).\nA BED file must be a tab-separated text file with one interval per line.\nThere must be at least three columns: chromosome, start, and end (0-based coordinates).\nAdditionally, the score column of the BED file can be used to provide an estimate of how many seconds it will take to call variants on that interval.\nThe fourth column remains unused.\n\n```\n|chr1|10000|207666|NA|47.3|\n```\nThis indicates that variant calling on the interval chr1:10001-207666 takes approximately 47.3 seconds.\n\nThe runtime estimate is used in two different ways.\nFirst, when there are multiple consecutive intervals in the file that take little time to compute, they are processed as a single job, thus reducing the number of processes that needs to be spawned.\nSecond, the jobs with largest processing time are started first, which reduces wall-clock time.\nIf no runtime is given, a time of 1000 nucleotides per second is assumed.\nActual figures vary from 2 nucleotides/second to 30000 nucleotides/second.\nIf you prefer, you can specify the full path to your reference genome when you run the pipeline:\n\n> **NB** If none provided, will be generated automatically from the FASTA reference\n> **NB** Use --no_intervals to disable automatic generation", - "description": "Path to intervals file" + "pattern": "\\S+\\.vcf\\.gz\\.tbi$", + "format": "file-path", + "mimetype": "text/plain", + "description": "Path to GATK Mutect2 Germline Resource Index.", + "help_text": "> **NB** If none provided, will be generated automatically from the Germline Resource file, if provided. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." }, "known_indels": { "type": "string", "fa_icon": "fas fa-copy", - "description": "Path to known indels file" + "format": "file-path-pattern", + "exists": true, + "mimetype": "text/plain", + "description": "Path to known indels file.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, "known_indels_tbi": { "type": "string", "fa_icon": "fas fa-copy", - "description": "Path to known indels file index", - "help_text": "> **NB** If none provided, will be generated automatically from the known index file, if provided" + "format": "file-path-pattern", + "exists": true, + "mimetype": "text/plain", + "description": "Path to known indels file index.", + "help_text": "> **NB** If none provided, will be generated automatically from the known index file, if provided. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + }, + "known_indels_vqsr": { + "type": "string", + "fa_icon": "fas fa-book", + "description": "Label string for VariantRecalibration (haplotypecaller joint variant calling). If you use AWS iGenomes, this has already been set for you appropriately." + }, + "known_snps": { + "type": "string", + "fa_icon": "fas fa-copy", + "format": "file-path", + "pattern": "^\\S+\\.vcf\\.gz$", + "exists": true, + "mimetype": "text/plain", + "description": "Path to known snps file.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." + }, + "known_snps_tbi": { + "type": "string", + "fa_icon": "fas fa-copy", + "format": "file-path", + "pattern": "^\\S+\\.vcf\\.gz\\.tbi$", + "exists": true, + "mimetype": "text/plain", + "description": "Path to known snps file snps.", + "help_text": "> **NB** If none provided, will be generated automatically from the known index file, if provided. Combine with `--save_reference` to save for future runs.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + }, + "known_snps_vqsr": { + "type": "string", + "fa_icon": "fas fa-book", + "description": "Label string for VariantRecalibration (haplotypecaller joint variant calling).If you use AWS iGenomes, this has already been set for you appropriately." }, "mappability": { "type": "string", "fa_icon": "fas fa-file", - "description": "Path to Control-FREEC mappability file" + "format": "file-path", + "pattern": "^\\S+\\.gem$", + "mimetype": "text/plain", + "description": "Path to Control-FREEC mappability file.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, - "snpeff_db": { + "msisensor2_models": { "type": "string", - "fa_icon": "fas fa-database", - "description": "snpEff DB version" + "format": "path", + "exists": true, + "fa_icon": "fas fa-folder-open", + "description": "Path to models folder used with MSIsensor2.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, - "vep_genome": { + "msisensorpro_scan": { "type": "string", - "fa_icon": "fas fa-microscope", - "description": "VEP genome", - "help_text": "If you use AWS iGenomes or a local resource with genomes.conf, this has already been set for you appropriately." + "format": "path", + "exists": true, + "fa_icon": "fas fa-folder-open", + "description": "Path to scan file used with MSIsensorPro.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, - "vep_species": { + "ngscheckmate_bed": { "type": "string", - "fa_icon": "fas fa-microscope", - "description": "VEP species", - "help_text": "If you use AWS iGenomes or a local resource with genomes.conf, this has already been set for you appropriately." + "fa_icon": "fas fa-file", + "format": "file-path", + "pattern": "^\\S+\\.bed$", + "mimetype": "text/plain", + "description": "Path to SNP bed file for sample checking with NGSCheckMate", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, - "vep_cache_version": { + "sentieon_dnascope_model": { "type": "string", - "fa_icon": "fas fa-tag", - "description": "VEP cache version" + "fa_icon": "fas fa-file", + "format": "file-path", + "pattern": "^\\S+\\.model$", + "mimetype": "text/plain", + "description": "Machine learning model for Sentieon Dnascope.", + "help_text": " It is recommended to use DNAscope with a machine learning model to perform variant calling with higher accuracy by improving the candidate detection and filtering. Sentieon can provide you with a model trained using a subset of the data from the GiAB truth-set found in https://github.com/genome-in-a-bottle. In addition, Sentieon can assist you in the creation of models using your own data, which will calibrate the specifics of your sequencing and bio-informatics processing.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." }, - "save_reference": { - "type": "boolean", - "fa_icon": "fas fa-download", - "description": "Save built references" + "snpeff_cache": { + "type": "string", + "format": "directory-path", + "fa_icon": "fas fa-cloud-download-alt", + "default": "s3://annotation-cache/snpeff_cache/", + "description": "Path to snpEff cache.", + "help_text": "Path to snpEff cache which should contain the relevant genome and build directory in the path ${snpeff_species}.${snpeff_version}\n\nIf you use AWS iGenomes, this has already been set for you appropriately." }, - "igenomes_base": { + "snpeff_db": { + "type": "string", + "fa_icon": "fas fa-database", + "description": "snpEff DB version.", + "help_text": "This is used to specify the database to be use to annotate with.\nAlternatively databases' names can be listed with the `snpEff databases`.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + }, + "vep_cache": { "type": "string", "format": "directory-path", - "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", - "fa_icon": "fas fa-cloud-download-alt" + "fa_icon": "fas fa-cloud-download-alt", + "default": "s3://annotation-cache/vep_cache/", + "description": "Path to VEP cache.", + "help_text": "Path to VEP cache which should contain the relevant species, genome and build directories at the path ${vep_species}/${vep_genome}_${vep_cache_version}\n\nIf you use AWS iGenomes, this has already been set for you appropriately." }, - "genomes_base": { + "vep_cache_version": { "type": "string", - "fa_icon": "fas fa-map-marker-alt", - "description": "Directory / URL base for genomes references.", - "help_text": "All files are supposed to be in the same folder" + "fa_icon": "fas fa-tag", + "description": "VEP cache version.", + "help_text": "Alternative cache version can be used to specify the correct Ensembl Genomes version number as these differ from the concurrent Ensembl/VEP version numbers.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "help_text": "Do not load `igenomes.config` when running the pipeline.\nYou may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.\nThis option will load the `genomes.config` file instead.\n\n> **NB** You can then specify the genome custom and specify at least a FASTA genome file." + "vep_genome": { + "type": "string", + "fa_icon": "fas fa-microscope", + "description": "VEP genome.", + "help_text": "This is used to specify the genome when looking for local cache, or cloud based cache.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." + }, + "vep_species": { + "type": "string", + "fa_icon": "fas fa-microscope", + "description": "VEP species.", + "help_text": "Alternatively species listed in Ensembl Genomes caches can be used.\n\nIf you use AWS iGenomes, this has already been set for you appropriately." } }, - "help_text": "The pipeline config files come bundled with paths to the Illumina iGenomes reference index files.\nThe configuration is set up to use the AWS-iGenomes resource\ncf https://ewels.github.io/AWS-iGenomes/\n" + "help_text": "The pipeline config files come bundled with paths to the Illumina iGenomes reference index files.\nThe configuration is set up to use the AWS-iGenomes resource\ncf https://ewels.github.io/AWS-iGenomes/." }, "institutional_config_options": { "title": "Institutional config options", @@ -598,47 +1190,31 @@ "hidden": true, "fa_icon": "fas fa-users-cog" }, - "sequencing_center": { + "test_data_base": { "type": "string", - "fa_icon": "fas fa-university", - "description": "Name of sequencing center to be displayed in BAM file", - "help_text": "It will be in the CN field", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/sarek3", + "description": "Base path / URL for data used in the test profiles", + "help_text": "Warning: The `-profile test` samplesheet file itself contains remote paths. Setting this parameter does not alter the contents of that file.", "hidden": true - } - } - }, - "max_job_request_options": { - "title": "Max job request options", - "type": "object", - "fa_icon": "fab fa-acquisitions-incorporated", - "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", - "properties": { - "max_cpus": { - "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 16, - "fa_icon": "fas fa-microchip", - "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" }, - "max_memory": { + "modules_testdata_base_path": { "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "128.GB", - "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + "description": "Base path / URL for data used in the modules", + "hidden": true }, - "max_time": { + "seq_center": { "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", - "default": "240.h", - "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", - "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + "fa_icon": "fas fa-university", + "description": "Sequencing center information to be added to read group (CN field).", + "hidden": true + }, + "seq_platform": { + "type": "string", + "fa_icon": "fas fa-university", + "default": "ILLUMINA", + "description": "Sequencing platform information to be added to read group (PL field).", + "help_text": "Default: ILLUMINA. Will be used to create a proper header for further GATK4 downstream analysis.", + "hidden": true } } }, @@ -649,12 +1225,21 @@ "description": "Less common options for the pipeline, typically set in a config file.", "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", "properties": { - "help": { + "version": { "type": "boolean", - "description": "Display help text.", + "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", "hidden": true }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, "email": { "type": "string", "description": "Email address for completion summary.", @@ -697,17 +1282,22 @@ }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true }, - "tracedir": { + "multiqc_logo": { "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", "hidden": true }, + "multiqc_methods_description": { + "type": "string", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -715,52 +1305,70 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true + }, + "trace_report_suffix": { + "type": "string", + "fa_icon": "far calendar", + "description": "Suffix to add to the trace report filename. Default is the date and time in the format yyyy-MM-dd_HH-mm-ss.", + "hidden": true + }, + "help": { + "type": ["boolean", "string"], + "description": "Display the help message." + }, + "help_full": { "type": "boolean", - "fa_icon": "far fa-eye-slash", - "description": "Show all params when using `--help`", - "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + "description": "Display the full detailed help message." }, - "enable_conda": { + "show_hidden": { "type": "boolean", - "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.", - "hidden": true, - "fa_icon": "fas fa-bacon" + "description": "Display hidden parameters in the help message (only works when --help or --help_full are provided)." } } } }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/$defs/input_output_options" + }, + { + "$ref": "#/$defs/main_options" + }, + { + "$ref": "#/$defs/fastq_preprocessing" }, { - "$ref": "#/definitions/main_options" + "$ref": "#/$defs/umi_processing" }, { - "$ref": "#/definitions/trim_split_fastq" + "$ref": "#/$defs/preprocessing" }, { - "$ref": "#/definitions/preprocessing" + "$ref": "#/$defs/variant_calling" }, { - "$ref": "#/definitions/variant_calling" + "$ref": "#/$defs/post_variant_calling" }, { - "$ref": "#/definitions/annotation" + "$ref": "#/$defs/annotation" }, { - "$ref": "#/definitions/reference_genome_options" + "$ref": "#/$defs/general_reference_genome_options" }, { - "$ref": "#/definitions/institutional_config_options" + "$ref": "#/$defs/reference_genome_options" }, { - "$ref": "#/definitions/max_job_request_options" + "$ref": "#/$defs/institutional_config_options" }, { - "$ref": "#/definitions/generic_options" + "$ref": "#/$defs/generic_options" } ] -} \ No newline at end of file +} diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000000..0676bf9351 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,26 @@ +config { + // location for all nf-test tests + testsDir "." + + // nf-test directory including temporary files for each test + workDir System.getenv("NFT_WORKDIR") ?: ".nf-test" + + // location of an optional nextflow.config file specific for executing tests + configFile "tests/nextflow.config" + + // ignore tests coming from the nf-core/modules repo + ignore 'modules/nf-core/**/tests/*', 'subworkflows/nf-core/**/tests/*' + + // run all test with defined profile(s) from the main nextflow.config + profile "test" + + // list of filenames or patterns that should be trigger a full test run + triggers 'assets/schema_input.json', 'conf/test.config', 'nextflow.config', 'nextflow_schema.json', 'nf-test.config', 'tests/.nftignore', 'tests/nextflow.config' + + // load the necessary plugins + plugins { + load "nft-bam@0.6.1" + load "nft-utils@0.0.9" + load "nft-vcf@1.0.7" + } +} diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json new file mode 100644 index 0000000000..8a9dca4b41 --- /dev/null +++ b/ro-crate-metadata.json @@ -0,0 +1,582 @@ +{ + "@context": [ + "https://w3id.org/ro/crate/1.1/context", + { + "GithubService": "https://w3id.org/ro/terms/test#GithubService", + "JenkinsService": "https://w3id.org/ro/terms/test#JenkinsService", + "PlanemoEngine": "https://w3id.org/ro/terms/test#PlanemoEngine", + "TestDefinition": "https://w3id.org/ro/terms/test#TestDefinition", + "TestInstance": "https://w3id.org/ro/terms/test#TestInstance", + "TestService": "https://w3id.org/ro/terms/test#TestService", + "TestSuite": "https://w3id.org/ro/terms/test#TestSuite", + "TravisService": "https://w3id.org/ro/terms/test#TravisService", + "definition": "https://w3id.org/ro/terms/test#definition", + "engineVersion": "https://w3id.org/ro/terms/test#engineVersion", + "instance": "https://w3id.org/ro/terms/test#instance", + "resource": "https://w3id.org/ro/terms/test#resource", + "runsOn": "https://w3id.org/ro/terms/test#runsOn" + } + ], + "@graph": [ + { + "@id": "./", + "@type": "Dataset", + "creativeWorkStatus": "InProgress", + "datePublished": "2026-02-12T09:44:51+00:00", + "description": "

\n \n \n \"nf-core/sarek\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/sarek)\n[![GitHub Actions CI Status](https://github.com/nf-core/sarek/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/sarek/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/sarek/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/sarek/actions/workflows/linting.yml)\n[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/sarek/results)\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3476425-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3476425)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/sarek)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23sarek-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/sarek)\n[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)\n[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)\n[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/sarek** is a workflow designed to detect variants on whole genome or targeted sequencing data. Initially designed for Human, and Mouse, it can work on any species with a reference genome. Sarek can also handle tumour / normal pairs and could include additional relapses.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/sarek/results).\n\nIt's listed on [Elixir - Tools and Data Services Registry](https://bio.tools/nf-core-sarek) and [Dockstore](https://dockstore.org/workflows/github.com/nf-core/sarek).\n\n

\n \n

\n\n## Pipeline summary\n\nDepending on the options and samples provided, the pipeline can currently perform the following:\n\n- Form consensus reads from UMI sequences (`fgbio`)\n- Sequencing quality control and trimming (enabled by `--trim_fastq`) (`FastQC`, `fastp`)\n- Contamination removal (`BBSplit`, enabled by `--tools bbsplit`)\n- Map Reads to Reference (`BWA-mem`, `BWA-mem2`, `dragmap` or `Sentieon BWA-mem`)\n- Process BAM file (`GATK MarkDuplicates`, `GATK BaseRecalibrator` and `GATK ApplyBQSR` or `Sentieon LocusCollector` and `Sentieon Dedup`)\n- _Experimental Feature_: Use GPU-accelerated parabricks implementation as alternative to \"Map Reads to Reference\" + \"Process BAM file\" (`--aligner parabricks`)\n- Summarise alignment statistics (`samtools stats`, `mosdepth`)\n- Variant calling (enabled by `--tools`, see [compatibility](https://nf-co.re/sarek/latest/docs/usage#which-variant-calling-tool-is-implemented-for-which-data-type)):\n - `ASCAT`\n - `CNVkit`\n - `Control-FREEC`\n - `DeepVariant`\n - `freebayes`\n - `GATK HaplotypeCaller`\n - `GATK Mutect2`\n - `indexcov`\n - `Lofreq`\n - `Manta`\n - `mpileup`\n - `MSIsensor2`\n - `MSIsensor-pro`\n - `MuSE`\n - `Sentieon Haplotyper`\n - `Strelka`\n - `TIDDIT`\n- Post-variant calling options, one of:\n - Filtering (`bcftools view` (default: filter by `PASS,.`)), normalisation (`bcftools norm`) and consensus calling (`bcftools isec`, default: called by at least 2 tools `-n+2`) on all vcfs and/or `bcftools concat` for germline vcfs\n - `Varlociraptor` for all vcfs\n- Variant filtering and annotation (`SnpEff`, `Ensembl VEP`, `BCFtools annotate`, `SnpSift`)\n- Summarise and represent QC (`MultiQC`)\n\n

\n \n

\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\npatient,sample,lane,fastq_1,fastq_2\nID1,S1,L002,ID1_S1_L002_R1_001.fastq.gz,ID1_S1_L002_R2_001.fastq.gz\n```\n\nEach row represents a pair of fastq files (paired end).\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/sarek \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/sarek/usage) and the [parameter documentation](https://nf-co.re/sarek/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/sarek/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/sarek/output).\n\n## Benchmarking\n\nOn each release, the pipeline is run on 3 full size tests:\n\n- `test_full` runs tumor-normal data for one patient from the SEQ2C consortium\n- `test_full_germline` runs a WGS 30X Genome-in-a-Bottle(NA12878) dataset\n- `test_full_germline_ncbench_agilent` runs two WES samples with 75M and 200M reads (data available [here](https://github.com/ncbench/ncbench-workflow#contributing-callsets)). The results are uploaded to Zenodo, evaluated against a truth dataset, and results are made available via the [NCBench dashboard](https://ncbench.github.io/report/report.html#).\n\n## Credits\n\nSarek was originally written by Maxime U Garcia and Szilveszter Juhos at the [National Genomics Infastructure](https://ngisweden.scilifelab.se) and [National Bioinformatics Infastructure Sweden](https://nbis.se) which are both platforms at [SciLifeLab](https://scilifelab.se), with the support of [The Swedish Childhood Tumor Biobank (Barntum\u00f6rbanken)](https://ki.se/forskning/barntumorbanken).\nFriederike Hanssen and Gisela Gabernet at [QBiC](https://www.qbic.uni-tuebingen.de/) later joined and helped with further development.\n\nThe Nextflow DSL2 conversion of the pipeline was lead by Friederike Hanssen and Maxime U Garcia.\n\nMaintenance is now lead by Friederike Hanssen and Maxime U Garcia (now at [Seqera](https://seqera.io))\n\nMain developers:\n\n- [Maxime U Garcia](https://github.com/maxulysse)\n- [Friederike Hanssen](https://github.com/FriederikeHanssen)\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Abhinav Sharma](https://github.com/abhi18av)\n- [Adam Talbot](https://github.com/adamrtalbot)\n- [Adrian L\u00e4rkeryd](https://github.com/adrlar)\n- [\u00c0itor Olivares](https://github.com/AitorPeseta)\n- [Alexander Peltzer](https://github.com/apeltzer)\n- [Alison Meynert](https://github.com/ameynert)\n- [Anders Sune Pedersen](https://github.com/asp8200)\n- [arontommi](https://github.com/arontommi)\n- [BarryDigby](https://github.com/BarryDigby)\n- [Bekir Erg\u00fcner](https://github.com/berguner)\n- [bjornnystedt](https://github.com/bjornnystedt)\n- [cgpu](https://github.com/cgpu)\n- [Chela James](https://github.com/chelauk)\n- [David Mas-Ponte](https://github.com/davidmasp)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Famke B\u00e4uerle](https://github.com/famosab)\n- [Francesco Lescai](https://github.com/lescai)\n- [Francisco Mart\u00ednez](https://github.com/nevinwu)\n- [Gavin Mackenzie](https://github.com/GCJMackenzie)\n- [Gisela Gabernet](https://github.com/ggabernet)\n- [Grant Neilson](https://github.com/grantn5)\n- [gulfshores](https://github.com/gulfshores)\n- [Harshil Patel](https://github.com/drpatelh)\n- [Hongwei Ye](https://github.com/YeHW)\n- [James A. Fellows Yates](https://github.com/jfy133)\n- [Jesper Eisfeldt](https://github.com/J35P312)\n- [Johannes Alneberg](https://github.com/alneberg)\n- [Jonas Kjellin](https://github.com/kjellinjonas)\n- [Jos\u00e9 Fern\u00e1ndez Navarro](https://github.com/jfnavarro)\n- [J\u00falia Mir Pedrol](https://github.com/mirpedrol)\n- [Ken Brewer](https://github.com/kenibrewer)\n- [Lasse Westergaard Folkersen](https://github.com/lassefolkersen)\n- [Lucia Conde](https://github.com/lconde-ucl)\n- [Louis Le N\u00e9zet](https://github.com/LouisLeNezet)\n- [Malin Larsson](https://github.com/malinlarsson)\n- [Marcel Martin](https://github.com/marcelm)\n- [Nick Smith](https://github.com/nickhsmith)\n- [Nicolas Schcolnicov](https://github.com/nschcolnicov)\n- [Nilesh Tawari](https://github.com/nilesh-tawari)\n- [Nils Homer](https://github.com/nh13)\n- [Olga Botvinnik](https://github.com/olgabot)\n- [Oskar Wacker](https://github.com/WackerO)\n- [pallolason](https://github.com/pallolason)\n- [Paul Cantalupo](https://github.com/pcantalupo)\n- [Phil Ewels](https://github.com/ewels)\n- [Pierre Lindenbaum](https://github.com/lindenb)\n- [Sabrina Krakau](https://github.com/skrakau)\n- [Sam Minot](https://github.com/sminot)\n- [Sebastian-D](https://github.com/Sebastian-D)\n- [Silvia Morini](https://github.com/silviamorins)\n- [Simon Pearce](https://github.com/SPPearce)\n- [Solenne Correard](https://github.com/scorreard)\n- [Susanne Jodoin](https://github.com/SusiJo)\n- [Szilveszter Juhos](https://github.com/szilvajuhos)\n- [Tobias Koch](https://github.com/KochTobi)\n- [Winni Kretzschmar](https://github.com/winni2k)\n- [Patricie Skal\u00e1kov\u00e1](https://github.com/Patricie34)\n\n## Acknowledgements\n\n| [![Barntum\u00f6rbanken](docs/images/BTB_logo.png)](https://ki.se/forskning/barntumorbanken) | [![SciLifeLab](docs/images/SciLifeLab_logo.png)](https://scilifelab.se) |\n| :-----------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------: |\n| [![National Genomics Infrastructure](docs/images/NGI_logo.png)](https://ngisweden.scilifelab.se/) | [![National Bioinformatics Infrastructure Sweden](docs/images/NBIS_logo.png)](https://nbis.se) |\n| [![QBiC](docs/images/QBiC_logo.png)](https://www.qbic.uni-tuebingen.de) | [![GHGA](docs/images/GHGA_logo.png)](https://www.ghga.de/) |\n| [![DNGC](docs/images/DNGC_logo.png)](https://eng.ngc.dk/) | |\n\n## Contributions & Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#sarek` channel](https://nfcore.slack.com/channels/sarek) (you can join with [this invite](https://nf-co.re/join/slack)), or contact us: [Maxime U Garcia](mailto:maxime.garcia@seqera.io?subject=[GitHub]%20nf-core/sarek), [Friederike Hanssen](mailto:friederike.hanssen@qbic.uni-tuebingen.de?subject=[GitHub]%20nf-core/sarek)\n\n## Citations\n\nIf you use `nf-core/sarek` for your analysis, please cite the `Sarek` article as follows:\n\n> Friederike Hanssen, Maxime U Garcia, Lasse Folkersen, Anders Sune Pedersen, Francesco Lescai, Susanne Jodoin, Edmund Miller, Oskar Wacker, Nicholas Smith, nf-core community, Gisela Gabernet, Sven Nahnsen **Scalable and efficient DNA sequencing analysis on different compute infrastructures aiding variant discovery** _NAR Genomics and Bioinformatics_ Volume 6, Issue 2, June 2024, lqae031, [doi: 10.1093/nargab/lqae031](https://doi.org/10.1093/nargab/lqae031).\n\n> Garcia M, Juhos S, Larsson M et al. **Sarek: A portable workflow for whole-genome sequencing analysis of germline and somatic variants [version 2; peer review: 2 approved]** _F1000Research_ 2020, 9:63 [doi: 10.12688/f1000research.16665.2](http://dx.doi.org/10.12688/f1000research.16665.2).\n\nYou can cite the sarek zenodo record for a specific version using the following [doi: 10.5281/zenodo.3476425](https://doi.org/10.5281/zenodo.3476425)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n\n## CHANGELOG\n\n- [CHANGELOG](CHANGELOG.md)\n", + "hasPart": [ + { + "@id": "main.nf" + }, + { + "@id": "assets/" + }, + { + "@id": "bin/" + }, + { + "@id": "conf/" + }, + { + "@id": "docs/" + }, + { + "@id": "docs/images/" + }, + { + "@id": "modules/" + }, + { + "@id": "modules/local/" + }, + { + "@id": "modules/nf-core/" + }, + { + "@id": "workflows/" + }, + { + "@id": "subworkflows/" + }, + { + "@id": "nextflow.config" + }, + { + "@id": "README.md" + }, + { + "@id": "nextflow_schema.json" + }, + { + "@id": "CHANGELOG.md" + }, + { + "@id": "LICENSE" + }, + { + "@id": "CODE_OF_CONDUCT.md" + }, + { + "@id": "CITATIONS.md" + }, + { + "@id": "modules.json" + }, + { + "@id": "docs/usage.md" + }, + { + "@id": "docs/output.md" + }, + { + "@id": ".nf-core.yml" + }, + { + "@id": ".pre-commit-config.yaml" + }, + { + "@id": ".prettierignore" + } + ], + "isBasedOn": "https://github.com/nf-core/sarek", + "license": "MIT", + "mainEntity": { + "@id": "main.nf" + }, + "mentions": [ + { + "@id": "#5f864abe-a377-4d7c-a6d4-9bb60b61c064" + } + ], + "name": "nf-core/sarek" + }, + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": [ + { + "@id": "https://w3id.org/ro/crate/1.1" + }, + { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + } + ] + }, + { + "@id": "main.nf", + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow" + ], + "creator": [ + { + "@id": "#max.u.garcia@gmail.com" + }, + { + "@id": "https://orcid.org/0000-0003-1387-0251" + }, + { + "@id": "#24893913+SPPearce@users.noreply.github.com" + }, + { + "@id": "https://orcid.org/0009-0007-7860-1155" + }, + { + "@id": "#adr.lar@me.com" + }, + { + "@id": "#53608000+lescai@users.noreply.github.com" + }, + { + "@id": "#jc.fernandez.navarro@gmail.com" + }, + { + "@id": "https://orcid.org/0000-0001-6104-9260" + }, + { + "@id": "#malin.larsson@liu.se" + }, + { + "@id": "https://orcid.org/0000-0001-7409-305X" + }, + { + "@id": "https://orcid.org/0000-0003-3996-0909" + }, + { + "@id": "https://orcid.org/0000-0002-5762-6253" + }, + { + "@id": "https://orcid.org/0000-0003-3966-8481" + }, + { + "@id": "#yehwhey@gmail.com" + }, + { + "@id": "https://orcid.org/0000-0002-6503-2180" + }, + { + "@id": "https://orcid.org/0000-0003-3996-0909" + }, + { + "@id": "https://orcid.org/0009-0001-9875-5262" + }, + { + "@id": "#heuermh@acm.org" + }, + { + "@id": "https://orcid.org/0000-0003-0603-7907" + }, + { + "@id": "https://orcid.org/0009-0006-2111-4316" + }, + { + "@id": "https://orcid.org/0000-0001-6280-4643" + }, + { + "@id": "#max.u.garcia@gmail.com" + }, + { + "@id": "#l.conde@ucl.ac.uk" + } + ], + "dateCreated": "", + "dateModified": "2026-02-12T10:44:51Z", + "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", + "keywords": [ + "nf-core", + "nextflow", + "annotation", + "cancer", + "gatk4", + "genomics", + "germline", + "pre-processing", + "somatic", + "target-panels", + "variant-calling", + "whole-exome-sequencing", + "whole-genome-sequencing" + ], + "license": [ + "MIT" + ], + "maintainer": [ + { + "@id": "#max.u.garcia@gmail.com" + }, + { + "@id": "https://orcid.org/0000-0003-1387-0251" + }, + { + "@id": "#24893913+SPPearce@users.noreply.github.com" + }, + { + "@id": "https://orcid.org/0009-0007-7860-1155" + }, + { + "@id": "#jc.fernandez.navarro@gmail.com" + }, + { + "@id": "https://orcid.org/0000-0001-6104-9260" + }, + { + "@id": "#malin.larsson@liu.se" + }, + { + "@id": "https://orcid.org/0000-0001-7409-305X" + }, + { + "@id": "https://orcid.org/0000-0003-3966-8481" + }, + { + "@id": "#yehwhey@gmail.com" + }, + { + "@id": "https://orcid.org/0000-0002-6503-2180" + }, + { + "@id": "https://orcid.org/0000-0003-3996-0909" + }, + { + "@id": "https://orcid.org/0000-0003-0603-7907" + }, + { + "@id": "https://orcid.org/0000-0001-6280-4643" + }, + { + "@id": "#l.conde@ucl.ac.uk" + } + ], + "name": [ + "nf-core/sarek" + ], + "programmingLanguage": { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" + }, + "sdPublisher": { + "@id": "https://nf-co.re/" + }, + "url": [ + "https://github.com/nf-core/sarek", + "https://nf-co.re/sarek/dev/" + ], + "version": [ + "3.9.0dev" + ] + }, + { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", + "@type": "ComputerLanguage", + "identifier": { + "@id": "https://www.nextflow.io/" + }, + "name": "Nextflow", + "url": { + "@id": "https://www.nextflow.io/" + }, + "version": "!>=25.10.2" + }, + { + "@id": "#5f864abe-a377-4d7c-a6d4-9bb60b61c064", + "@type": "TestSuite", + "instance": [ + { + "@id": "#8dc801d6-82f6-4ce4-aff5-5ba50ff3ee06" + } + ], + "mainEntity": { + "@id": "main.nf" + }, + "name": "Test suite for nf-core/sarek" + }, + { + "@id": "#8dc801d6-82f6-4ce4-aff5-5ba50ff3ee06", + "@type": "TestInstance", + "name": "GitHub Actions workflow for testing nf-core/sarek", + "resource": "repos/nf-core/sarek/actions/workflows/nf-test.yml", + "runsOn": { + "@id": "https://w3id.org/ro/terms/test#GithubService" + }, + "url": "https://api.github.com" + }, + { + "@id": "https://w3id.org/ro/terms/test#GithubService", + "@type": "TestService", + "name": "Github Actions", + "url": { + "@id": "https://github.com" + } + }, + { + "@id": "assets/", + "@type": "Dataset", + "description": "Additional files" + }, + { + "@id": "bin/", + "@type": "Dataset", + "description": "Scripts that must be callable from a pipeline process" + }, + { + "@id": "conf/", + "@type": "Dataset", + "description": "Configuration files" + }, + { + "@id": "docs/", + "@type": "Dataset", + "description": "Markdown files for documenting the pipeline" + }, + { + "@id": "docs/images/", + "@type": "Dataset", + "description": "Images for the documentation files" + }, + { + "@id": "modules/", + "@type": "Dataset", + "description": "Modules used by the pipeline" + }, + { + "@id": "modules/local/", + "@type": "Dataset", + "description": "Pipeline-specific modules" + }, + { + "@id": "modules/nf-core/", + "@type": "Dataset", + "description": "nf-core modules" + }, + { + "@id": "workflows/", + "@type": "Dataset", + "description": "Main pipeline workflows to be executed in main.nf" + }, + { + "@id": "subworkflows/", + "@type": "Dataset", + "description": "Smaller subworkflows" + }, + { + "@id": "nextflow.config", + "@type": "File", + "description": "Main Nextflow configuration file" + }, + { + "@id": "README.md", + "@type": "File", + "description": "Basic pipeline usage information" + }, + { + "@id": "nextflow_schema.json", + "@type": "File", + "description": "JSON schema for pipeline parameter specification" + }, + { + "@id": "CHANGELOG.md", + "@type": "File", + "description": "Information on changes made to the pipeline" + }, + { + "@id": "LICENSE", + "@type": "File", + "description": "The license - should be MIT" + }, + { + "@id": "CODE_OF_CONDUCT.md", + "@type": "File", + "description": "The nf-core code of conduct" + }, + { + "@id": "CITATIONS.md", + "@type": "File", + "description": "Citations needed when using the pipeline" + }, + { + "@id": "modules.json", + "@type": "File", + "description": "Version information for modules from nf-core/modules" + }, + { + "@id": "docs/usage.md", + "@type": "File", + "description": "Usage documentation" + }, + { + "@id": "docs/output.md", + "@type": "File", + "description": "Output documentation" + }, + { + "@id": ".nf-core.yml", + "@type": "File", + "description": "nf-core configuration file, configuring template features and linting rules" + }, + { + "@id": ".pre-commit-config.yaml", + "@type": "File", + "description": "Configuration file for pre-commit hooks" + }, + { + "@id": ".prettierignore", + "@type": "File", + "description": "Ignore file for prettier" + }, + { + "@id": "https://nf-co.re/", + "@type": "Organization", + "name": "nf-core", + "url": "https://nf-co.re/" + }, + { + "@id": "#max.u.garcia@gmail.com", + "@type": "Person", + "email": "max.u.garcia@gmail.com", + "name": "Maxime Garcia" + }, + { + "@id": "https://orcid.org/0000-0003-1387-0251", + "@type": "Person", + "email": "45968370+famosab@users.noreply.github.com", + "name": "Famke B\u00e4uerle" + }, + { + "@id": "#24893913+SPPearce@users.noreply.github.com", + "@type": "Person", + "email": "24893913+SPPearce@users.noreply.github.com", + "name": "Simon Pearce" + }, + { + "@id": "https://orcid.org/0009-0007-7860-1155", + "@type": "Person", + "email": "nh13@users.noreply.github.com", + "name": "Nils Homer" + }, + { + "@id": "#adr.lar@me.com", + "@type": "Person", + "email": "adr.lar@me.com", + "name": "Adrian Larkeryd" + }, + { + "@id": "#53608000+lescai@users.noreply.github.com", + "@type": "Person", + "email": "53608000+lescai@users.noreply.github.com", + "name": "Francesco L" + }, + { + "@id": "#jc.fernandez.navarro@gmail.com", + "@type": "Person", + "email": "jc.fernandez.navarro@gmail.com", + "name": "Jos\u00e9 Fern\u00e1ndez Navarro" + }, + { + "@id": "https://orcid.org/0000-0001-6104-9260", + "@type": "Person", + "email": "mirp.julia@gmail.com", + "name": "J\u00falia Mir Pedrol" + }, + { + "@id": "#malin.larsson@liu.se", + "@type": "Person", + "email": "malin.larsson@liu.se", + "name": "Malin Larsson" + }, + { + "@id": "https://orcid.org/0000-0001-7409-305X", + "@type": "Person", + "email": "david.mas.p@gmail.com", + "name": "David Mas-Ponte" + }, + { + "@id": "https://orcid.org/0000-0003-3996-0909", + "@type": "Person", + "email": "chela.james@icr.ac.uk", + "name": "Chela James" + }, + { + "@id": "https://orcid.org/0000-0002-5762-6253", + "@type": "Person", + "email": "bounlu@gmail.com", + "name": "\u00d6mer An" + }, + { + "@id": "https://orcid.org/0000-0003-3966-8481", + "@type": "Person", + "email": "pcantalupo@gmail.com", + "name": "Paul Cantalupo" + }, + { + "@id": "#yehwhey@gmail.com", + "@type": "Person", + "email": "yehwhey@gmail.com", + "name": "Hongwei Ye" + }, + { + "@id": "https://orcid.org/0000-0002-6503-2180", + "@type": "Person", + "email": "apeltzer@users.noreply.github.com", + "name": "Alexander Peltzer" + }, + { + "@id": "https://orcid.org/0009-0001-9875-5262", + "@type": "Person", + "email": "friederike.hanssen@seqera.io", + "name": "Friederike Hanssen" + }, + { + "@id": "#heuermh@acm.org", + "@type": "Person", + "email": "heuermh@acm.org", + "name": "Michael L Heuer" + }, + { + "@id": "https://orcid.org/0000-0003-0603-7907", + "@type": "Person", + "email": "sabrina.krakau.qbic@gmail.com", + "name": "Sabrina Krakau" + }, + { + "@id": "https://orcid.org/0009-0006-2111-4316", + "@type": "Person", + "email": "smith@in.tum.de", + "name": "Smith Nicholas" + }, + { + "@id": "https://orcid.org/0000-0001-6280-4643", + "@type": "Person", + "email": "szilveszter.juhos@scilifelab.se", + "name": "Szilveszter Juhos" + }, + { + "@id": "#l.conde@ucl.ac.uk", + "@type": "Person", + "email": "l.conde@ucl.ac.uk", + "name": "Lucia Conde" + } + ] +} \ No newline at end of file diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf deleted file mode 100644 index e6e8898a78..0000000000 --- a/subworkflows/local/annotate.nf +++ /dev/null @@ -1,54 +0,0 @@ -// -// ANNOTATION -// - -include { ANNOTATION_SNPEFF } from '../nf-core/annotation_snpeff/main' -include { ANNOTATION_ENSEMBLVEP as MERGE_ANNOTATE } from '../nf-core/annotation_ensemblvep/main' -include { ANNOTATION_ENSEMBLVEP } from '../nf-core/annotation_ensemblvep/main' - -workflow ANNOTATE { - take: - vcf // channel: [ val(meta), vcf ] - tools - snpeff_db - snpeff_cache - vep_genome - vep_species - vep_cache_version - vep_cache - - main: - ch_reports = Channel.empty() - ch_vcf_ann = Channel.empty() - ch_versions = Channel.empty() - - if (tools.contains('merge') || tools.contains('snpeff')) { - ANNOTATION_SNPEFF(vcf, snpeff_db, snpeff_cache) - - ch_reports = ch_reports.mix(ANNOTATION_SNPEFF.out.reports) - ch_vcf_ann = ch_vcf_ann.mix(ANNOTATION_SNPEFF.out.vcf_tbi) - ch_versions = ch_versions.mix(ANNOTATION_SNPEFF.out.versions.first()) - } - - if (tools.contains('merge')) { - vcf_ann_for_merge = ANNOTATION_SNPEFF.out.vcf_tbi.map{ meta, vcf, tbi -> [meta, vcf] } - MERGE_ANNOTATE(vcf_ann_for_merge, vep_genome, vep_species, vep_cache_version, vep_cache) - - ch_reports = ch_reports.mix(MERGE_ANNOTATE.out.reports) - ch_vcf_ann = ch_vcf_ann.mix(MERGE_ANNOTATE.out.vcf_tbi) - ch_versions = ch_versions.mix(MERGE_ANNOTATE.out.versions.first()) - } - - if (tools.contains('vep')) { - ANNOTATION_ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache) - - ch_reports = ch_reports.mix(ANNOTATION_ENSEMBLVEP.out.reports) - ch_vcf_ann = ch_vcf_ann.mix(ANNOTATION_ENSEMBLVEP.out.vcf_tbi) - ch_versions = ch_versions.mix(ANNOTATION_ENSEMBLVEP.out.versions.first()) - } - - emit: - vcf_ann = ch_vcf_ann // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] - reports = ch_reports // path: *.html - versions = ch_versions // path: versions.yml -} diff --git a/subworkflows/local/annotation_cache_initialisation/main.nf b/subworkflows/local/annotation_cache_initialisation/main.nf new file mode 100644 index 0000000000..1bc86a27c1 --- /dev/null +++ b/subworkflows/local/annotation_cache_initialisation/main.nf @@ -0,0 +1,72 @@ +// +// ANNOTATION CACHE INITIALISATION +// + +// Initialise channels based on params or indices that were just built +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run +// Condition is based on params.step and params.tools +// If and extra condition exists, it's specified in comments + +workflow ANNOTATION_CACHE_INITIALISATION { + take: + snpeff_enabled + snpeff_cache + snpeff_db + vep_enabled + vep_cache + vep_species + vep_cache_version + vep_genome + vep_custom_args + help_message + + main: + if (snpeff_enabled) { + def snpeff_annotation_cache_key = isCloudUrl(snpeff_cache) ? "${snpeff_db}/" : "" + def snpeff_cache_dir = "${snpeff_annotation_cache_key}${snpeff_db}" + def snpeff_cache_path_full = file("${snpeff_cache}/${snpeff_cache_dir}", type: 'dir') + if (!snpeff_cache_path_full.exists() || !snpeff_cache_path_full.isDirectory()) { + if (snpeff_cache == "s3://annotation-cache/snpeff_cache/") { + error("This path is not available within annotation-cache.\nPlease check https://annotation-cache.github.io/ to create a request for it.") + } + else { + error("Path provided with SnpEff cache is invalid.\nMake sure there is a directory named ${snpeff_cache_dir} in ${snpeff_cache}.\n${help_message}") + } + } + snpeff_cache = Channel.fromPath(file("${snpeff_cache}/${snpeff_annotation_cache_key}"), checkIfExists: true) + .collect() + .map { cache -> [[id: "${snpeff_db}"], cache] } + } + else { + snpeff_cache = [] + } + + if (vep_enabled) { + def vep_annotation_cache_key = isCloudUrl(vep_cache) ? "${vep_cache_version}_${vep_genome}/" : "" + def vep_species_suffix = vep_custom_args.contains("--merged") ? '_merged' : (vep_custom_args.contains("--refseq") ? '_refseq' : '') + def vep_cache_dir = "${vep_annotation_cache_key}${vep_species}${vep_species_suffix}/${vep_cache_version}_${vep_genome}" + def vep_cache_path_full = file("${vep_cache}/${vep_cache_dir}", type: 'dir') + if (!vep_cache_path_full.exists() || !vep_cache_path_full.isDirectory()) { + if (vep_cache == "s3://annotation-cache/vep_cache/") { + error("This path is not available within annotation-cache.\nPlease check https://annotation-cache.github.io/ to create a request for it.") + } + else { + error("Path provided with VEP cache is invalid.\nMake sure there is a directory named ${vep_cache_dir} in ${vep_cache}.\n${help_message}") + } + } + ensemblvep_cache = Channel.fromPath(file("${vep_cache}/${vep_annotation_cache_key}"), checkIfExists: true).collect() + } + else { + ensemblvep_cache = [] + } + + emit: + ensemblvep_cache // channel: [ meta, cache ] + snpeff_cache // channel: [ meta, cache ] +} + +// Helper function to check if cache path is from any cloud provider +def isCloudUrl(cache_url) { + return cache_url.startsWith("s3://") || cache_url.startsWith("gs://") || cache_url.startsWith("az://") +} diff --git a/subworkflows/local/bam2fastq.nf b/subworkflows/local/bam2fastq.nf deleted file mode 100644 index 2956c2ab93..0000000000 --- a/subworkflows/local/bam2fastq.nf +++ /dev/null @@ -1,88 +0,0 @@ -// -// BAM/CRAM to FASTQ conversion, paired end only -// - - -//include { FASTQC } from '../../modules/nf-core/modules/fastqc/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_MAP_MAP } from '../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_UNMAP_UNMAP } from '../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_UNMAP_MAP } from '../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_MAP_UNMAP } from '../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_UNMAPPED } from '../../modules/nf-core/modules/samtools/merge/main' -include { SAMTOOLS_FASTQ as SAMTOOLS_FASTQ_UNMAPPED } from '../../modules/local/samtools/fastq/main' -include { SAMTOOLS_FASTQ as SAMTOOLS_FASTQ_MAPPED } from '../../modules/local/samtools/fastq/main' -include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main' - -workflow ALIGNMENT_TO_FASTQ { - take: - input // channel: [meta, alignment (BAM or CRAM), index (optional)] - fasta // optional: reference file if CRAM format and reference not in header - - - main: - ch_versions = Channel.empty() - //Index File if not PROVIDED -> this also requires updates to samtools view possibly URGH - - //QC input BAM? -> needs another FASTQC module implementation - - //MAP - MAP - SAMTOOLS_VIEW_MAP_MAP(input, fasta) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW_MAP_MAP.out.versions) - - // UNMAP - UNMAP - SAMTOOLS_VIEW_UNMAP_UNMAP(input, fasta) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW_UNMAP_UNMAP.out.versions) - - // UNMAP - MAP - SAMTOOLS_VIEW_UNMAP_MAP(input, fasta) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW_UNMAP_MAP.out.versions) - - //MAP - UNMAP - SAMTOOLS_VIEW_MAP_UNMAP(input, fasta) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW_MAP_UNMAP.out.versions) - - // Merge UNMAP - SAMTOOLS_VIEW_UNMAP_UNMAP.out.bam.join(SAMTOOLS_VIEW_UNMAP_MAP.out.bam, remainder: true) - .join(SAMTOOLS_VIEW_MAP_UNMAP.out.bam, remainder: true) - .map{ meta, unmap_unmap, unmap_map, map_unmap -> - [meta, [unmap_unmap, unmap_map, map_unmap]] - }.set{ all_unmapped_bam } - - SAMTOOLS_MERGE_UNMAPPED(all_unmapped_bam, fasta) - ch_versions = ch_versions.mix(SAMTOOLS_MERGE_UNMAPPED.out.versions) - - // Collate & convert unmapped - SAMTOOLS_FASTQ_UNMAPPED(SAMTOOLS_MERGE_UNMAPPED.out.bam) - ch_versions = ch_versions.mix(SAMTOOLS_FASTQ_UNMAPPED.out.versions) - - // Collate & convert mapped - SAMTOOLS_FASTQ_MAPPED(SAMTOOLS_VIEW_MAP_MAP.out.bam) - ch_versions = ch_versions.mix(SAMTOOLS_FASTQ_MAPPED.out.versions) - - // join Mapped & unmapped fastq - SAMTOOLS_FASTQ_UNMAPPED.out.reads.map{ meta, reads -> - fq_1 = reads.findAll{ it.toString().endsWith("_1.fq.gz") }.get(0) - fq_2 = reads.findAll{ it.toString().endsWith("_2.fq.gz") }.get(0) - [meta, [ fq_1, fq_2]] - }.set{unmapped_reads} - - SAMTOOLS_FASTQ_MAPPED.out.reads.map{ meta, reads -> - fq_1 = reads.findAll{ it.toString().endsWith("_1.fq.gz") }.get(0) - fq_2 = reads.findAll{ it.toString().endsWith("_2.fq.gz") }.get(0) - [meta, [ fq_1, fq_2]] - }.set{mapped_reads} - - mapped_reads.join(unmapped_reads).map{ meta, mapped_reads, unmapped_reads -> - [meta, [mapped_reads[0], mapped_reads[1], unmapped_reads[0], unmapped_reads[1]]] - }.set{ reads_to_concat } - - // Concatenate Mapped_R1 with Unmapped_R1 and Mapped_R2 with Unmapped_R2 - CAT_FASTQ(reads_to_concat) - ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) - - emit: - reads = CAT_FASTQ.out.reads - versions = ch_versions - -} diff --git a/subworkflows/local/bam_applybqsr/main.nf b/subworkflows/local/bam_applybqsr/main.nf new file mode 100644 index 0000000000..fec222db90 --- /dev/null +++ b/subworkflows/local/bam_applybqsr/main.nf @@ -0,0 +1,88 @@ +// +// RECALIBRATE +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { BAM_MERGE_INDEX_SAMTOOLS } from '../bam_merge_index_samtools' +include { CRAM_MERGE_INDEX_SAMTOOLS } from '../cram_merge_index_samtools' +include { GATK4_APPLYBQSR } from '../../../modules/nf-core/gatk4/applybqsr' + +workflow BAM_APPLYBQSR { + take: + cram // channel: [mandatory] [ meta, cram, crai, recal ] + dict // channel: [mandatory] [ meta, dict ] + fasta // channel: [mandatory] [ meta, fasta ] + fasta_fai // channel: [mandatory] [ meta, fasta_fai ] + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + + main: + versions = channel.empty() + bam_applybqsr_single = channel.empty() + bam_to_merge = channel.empty() + + // Combine cram and intervals for spread and gather strategy + // Move num_intervals to meta map + cram_intervals = cram + .combine(intervals) + .map { meta, cram_, crai, recal, intervals_, num_intervals -> [meta + [num_intervals: num_intervals], cram_, crai, recal, intervals_] } + + // RUN APPLYBQSR + GATK4_APPLYBQSR( + cram_intervals, + fasta.map { _meta, fasta_ -> [fasta_] }, + fasta_fai.map { _meta, fasta_fai_ -> [fasta_fai_] }, + dict.map { _meta, dict_ -> [dict_] }, + ) + + // FOR BAMs + if (params.save_output_as_bam) { + + bam_applybqsr_out = GATK4_APPLYBQSR.out.bam + .join(GATK4_APPLYBQSR.out.bai, failOnDuplicate: true, failOnMismatch: true) + .branch { files -> + single: files[0].num_intervals == 1 + multiple: files[0].num_intervals > 1 + } + + bam_applybqsr_single = bam_applybqsr_out.single + + // For multiple intervals, gather and merge the recalibrated cram files + bam_to_merge = bam_applybqsr_out.multiple + .map { meta, bam_, _bai -> [groupKey(meta, meta.num_intervals), bam_] } + .groupTuple() + } + + // Merge and index the recalibrated cram files + BAM_MERGE_INDEX_SAMTOOLS(bam_to_merge) + + // Combine single and merged multiple bam and index files, removing num_intervals field + bam_recal = bam_applybqsr_single + .mix(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai) + .map { meta, bam, bai -> [meta - meta.subMap('num_intervals'), bam, bai] } + + // FOR CRAMs + + // Gather the recalibrated cram files + cram_to_merge = GATK4_APPLYBQSR.out.cram.map { meta, cram_ -> [groupKey(meta, meta.num_intervals), cram_] }.groupTuple() + + // Merge and index the recalibrated cram files + CRAM_MERGE_INDEX_SAMTOOLS( + cram_to_merge, + fasta, + fasta_fai, + ) + + // Remove no longer necessary field: num_intervals + cram_recal = CRAM_MERGE_INDEX_SAMTOOLS.out.cram_crai.map { meta, cram_, crai -> [meta - meta.subMap('num_intervals'), cram_, crai] } + + // Gather versions of all tools used + versions = versions.mix(BAM_MERGE_INDEX_SAMTOOLS.out.versions) + versions = versions.mix(CRAM_MERGE_INDEX_SAMTOOLS.out.versions) + versions = versions.mix(GATK4_APPLYBQSR.out.versions) + + emit: + bam = bam_recal // channel: [ meta, bam, bai ] + cram = cram_recal // channel: [ meta, cram, crai ] + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_applybqsr_spark/main.nf b/subworkflows/local/bam_applybqsr_spark/main.nf new file mode 100644 index 0000000000..12358902a2 --- /dev/null +++ b/subworkflows/local/bam_applybqsr_spark/main.nf @@ -0,0 +1,88 @@ +// +// RECALIBRATE SPARK +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { BAM_MERGE_INDEX_SAMTOOLS } from '../bam_merge_index_samtools' +include { CRAM_MERGE_INDEX_SAMTOOLS } from '../cram_merge_index_samtools' +include { GATK4SPARK_APPLYBQSR } from '../../../modules/nf-core/gatk4spark/applybqsr' + +workflow BAM_APPLYBQSR_SPARK { + take: + cram // channel: [mandatory] [ meta, cram, crai, recal ] + dict // channel: [mandatory] [ dict ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + + main: + versions = channel.empty() + bam_applybqsr_single = channel.empty() + bam_to_merge = channel.empty() + + // Combine cram and intervals for spread and gather strategy + // Move num_intervals to meta map + cram_intervals = cram + .combine(intervals) + .map { meta, cram_, crai, recal, intervals_, num_intervals -> [meta + [num_intervals: num_intervals], cram_, crai, recal, intervals_] } + + // RUN APPLYBQSR SPARK + GATK4SPARK_APPLYBQSR( + cram_intervals, + fasta.map { _meta, fasta_ -> [fasta_] }, + fasta_fai.map { _meta, fasta_fai_ -> [fasta_fai_] }, + dict.map { _meta, dict_ -> [dict_] }, + ) + + // FOR BAMs + if (params.save_output_as_bam) { + + bam_applybqsr_out = GATK4SPARK_APPLYBQSR.out.bam + .join(GATK4SPARK_APPLYBQSR.out.bai, failOnDuplicate: true, failOnMismatch: true) + .branch { files -> + single: files[0].num_intervals == 1 + multiple: files[0].num_intervals > 1 + } + + bam_applybqsr_single = bam_applybqsr_out.single + + // For multiple intervals, gather and merge the recalibrated cram files + bam_to_merge = bam_applybqsr_out.multiple + .map { meta, bam_, _bai -> [groupKey(meta, meta.num_intervals), bam_] } + .groupTuple() + } + + // Merge and index the recalibrated cram files + BAM_MERGE_INDEX_SAMTOOLS(bam_to_merge) + + // Combine single and merged multiple bam and index files, removing num_intervals field + bam_recal = bam_applybqsr_single + .mix(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai) + .map { meta, bam, bai -> [meta - meta.subMap('num_intervals'), bam, bai] } + + // FOR CRAMs + + // Gather the recalibrated cram files + cram_to_merge = GATK4SPARK_APPLYBQSR.out.cram.map { meta, cram_ -> [groupKey(meta, meta.num_intervals), cram_] }.groupTuple() + + // Merge and index the recalibrated cram files + CRAM_MERGE_INDEX_SAMTOOLS( + cram_to_merge, + fasta, + fasta_fai, + ) + + // Remove no longer necessary field: num_intervals + cram_recal = CRAM_MERGE_INDEX_SAMTOOLS.out.cram_crai.map { meta, cram_, crai -> [meta - meta.subMap('num_intervals'), cram_, crai] } + + // Gather versions of all tools used + versions = versions.mix(BAM_MERGE_INDEX_SAMTOOLS.out.versions) + versions = versions.mix(CRAM_MERGE_INDEX_SAMTOOLS.out.versions) + versions = versions.mix(GATK4SPARK_APPLYBQSR.out.versions) + + emit: + bam = bam_recal // channel: [ meta, bam, bai ] + cram = cram_recal // channel: [ meta, cram, crai ] + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_baserecalibrator/main.nf b/subworkflows/local/bam_baserecalibrator/main.nf new file mode 100644 index 0000000000..9535c6463e --- /dev/null +++ b/subworkflows/local/bam_baserecalibrator/main.nf @@ -0,0 +1,61 @@ +// +// PREPARE RECALIBRATION +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { GATK4_BASERECALIBRATOR } from '../../../modules/nf-core/gatk4/baserecalibrator/main' +include { GATK4_GATHERBQSRREPORTS } from '../../../modules/nf-core/gatk4/gatherbqsrreports/main' + +workflow BAM_BASERECALIBRATOR { + take: + cram // channel: [mandatory] [ meta, cram_markduplicates, crai ] + dict // channel: [mandatory] [ dict ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals // channel: [mandatory] [ intervals, num_intervals ] (or [ [], 0 ] if no intervals) + known_sites // channel: [optional] [ known_sites ] + known_sites_tbi // channel: [optional] [ known_sites_tbi ] + + main: + versions = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals = cram.combine(intervals) + // Move num_intervals to meta map + .map{ meta, cram_, crai, intervals_, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram_, crai, intervals_ ] } + + // RUN BASERECALIBRATOR + GATK4_BASERECALIBRATOR( + cram_intervals, + fasta, + fasta_fai, + dict, + known_sites.map{files -> [['id' : 'known_sites'], files]}, + known_sites_tbi.map{files -> [['id' : 'known_sites'], files]} + ) + + // Figuring out if there is one or more table(s) from the same sample + table_to_merge = GATK4_BASERECALIBRATOR.out.table.map{ meta, table -> [ groupKey(meta, meta.num_intervals), table ] }.groupTuple().branch{ + // Use meta.num_intervals to asses number of intervals + single: it[0].num_intervals <= 1 + multiple: it[0].num_intervals > 1 + } + + // Only when using intervals + GATK4_GATHERBQSRREPORTS(table_to_merge.multiple) + + // Mix intervals and no_intervals channels together + table_bqsr = GATK4_GATHERBQSRREPORTS.out.table.mix(table_to_merge.single.map{ meta, table -> [ meta, table[0] ] }) + // Remove no longer necessary field: num_intervals + .map{ meta, table -> [ meta - meta.subMap('num_intervals'), table ] } + + // Gather versions of all tools used + versions = versions.mix(GATK4_BASERECALIBRATOR.out.versions) + versions = versions.mix(GATK4_GATHERBQSRREPORTS.out.versions) + + emit: + table_bqsr // channel: [ meta, table ] + + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_baserecalibrator_spark/main.nf b/subworkflows/local/bam_baserecalibrator_spark/main.nf new file mode 100644 index 0000000000..98f597ff43 --- /dev/null +++ b/subworkflows/local/bam_baserecalibrator_spark/main.nf @@ -0,0 +1,54 @@ +// +// PREPARE RECALIBRATION SPARK +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { GATK4SPARK_BASERECALIBRATOR } from '../../../modules/nf-core/gatk4spark/baserecalibrator/main' +include { GATK4_GATHERBQSRREPORTS } from '../../../modules/nf-core/gatk4/gatherbqsrreports/main' + +workflow BAM_BASERECALIBRATOR_SPARK { + take: + cram // channel: [mandatory] [ meta, cram_markduplicates, crai ] + dict // channel: [mandatory] [ dict ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals // channel: [mandatory] [ intervals, num_intervals ] (or [ [], 0 ] if no intervals) + known_sites // channel: [optional] [ known_sites ] + known_sites_tbi // channel: [optional] [ known_sites_tbi ] + + main: + versions = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals = cram.combine(intervals) + // Move num_intervals to meta map + .map{ meta, cram_, crai, intervals_, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram_, crai, intervals_ ] } + + // RUN BASERECALIBRATOR SPARK + GATK4SPARK_BASERECALIBRATOR(cram_intervals, fasta.map{ meta, it -> [ it ] }, fasta_fai.map{ meta, it -> [ it ] }, dict.map{ meta, it -> [ it ] }, known_sites, known_sites_tbi) + + // Figuring out if there is one or more table(s) from the same sample + table_to_merge = GATK4SPARK_BASERECALIBRATOR.out.table.map{ meta, table -> [ groupKey(meta, meta.num_intervals), table ] }.groupTuple().branch{ + // Use meta.num_intervals to asses number of intervals + single: it[0].num_intervals <= 1 + multiple: it[0].num_intervals > 1 + } + + // Only when using intervals + GATK4_GATHERBQSRREPORTS(table_to_merge.multiple) + + // Mix intervals and no_intervals channels together + table_bqsr = GATK4_GATHERBQSRREPORTS.out.table.mix(table_to_merge.single.map{ meta, table -> [ meta, table[0] ] }) + // Remove no longer necessary field: num_intervals + .map{ meta, table -> [ meta - meta.subMap('num_intervals'), table ] } + + // Gather versions of all tools used + versions = versions.mix(GATK4SPARK_BASERECALIBRATOR.out.versions) + versions = versions.mix(GATK4_GATHERBQSRREPORTS.out.versions) + + emit: + table_bqsr // channel: [ meta, table ] + + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_convert_samtools/main.nf b/subworkflows/local/bam_convert_samtools/main.nf new file mode 100644 index 0000000000..c20001d087 --- /dev/null +++ b/subworkflows/local/bam_convert_samtools/main.nf @@ -0,0 +1,76 @@ +// +// BAM/CRAM to FASTQ conversion, paired end only +// + +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_MAP_MAP } from '../../../modules/nf-core/samtools/view' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_UNMAP_UNMAP } from '../../../modules/nf-core/samtools/view' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_UNMAP_MAP } from '../../../modules/nf-core/samtools/view' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_MAP_UNMAP } from '../../../modules/nf-core/samtools/view' +include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_UNMAP } from '../../../modules/nf-core/samtools/merge' +include { SAMTOOLS_COLLATEFASTQ as COLLATE_FASTQ_UNMAP } from '../../../modules/nf-core/samtools/collatefastq' +include { SAMTOOLS_COLLATEFASTQ as COLLATE_FASTQ_MAP } from '../../../modules/nf-core/samtools/collatefastq' +include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq' + +workflow BAM_CONVERT_SAMTOOLS { + take: + input // channel: [meta, alignment (BAM or CRAM), index (optional)] + fasta // optional: reference file if CRAM format and reference not in header + fasta_fai + interleaved // value: true/false + + main: + versions = Channel.empty() + + // Index File if not PROVIDED -> this also requires updates to samtools view possibly URGH + + // MAP - MAP + SAMTOOLS_VIEW_MAP_MAP(input, fasta, [], []) + + // UNMAP - UNMAP + SAMTOOLS_VIEW_UNMAP_UNMAP(input, fasta, [], []) + + // UNMAP - MAP + SAMTOOLS_VIEW_UNMAP_MAP(input, fasta, [], []) + + // MAP - UNMAP + SAMTOOLS_VIEW_MAP_UNMAP(input, fasta, [], []) + + // Merge UNMAP + all_unmapped_bam = SAMTOOLS_VIEW_UNMAP_UNMAP.out.bam + .join(SAMTOOLS_VIEW_UNMAP_MAP.out.bam, failOnDuplicate: true, remainder: true) + .join(SAMTOOLS_VIEW_MAP_UNMAP.out.bam, failOnDuplicate: true, remainder: true) + .map{ meta, unmap_unmap, unmap_map, map_unmap -> [ meta, [ unmap_unmap, unmap_map, map_unmap ] ] } + + SAMTOOLS_MERGE_UNMAP(all_unmapped_bam, fasta, fasta_fai) + + // Collate & convert unmapped + COLLATE_FASTQ_UNMAP(SAMTOOLS_MERGE_UNMAP.out.bam, fasta, interleaved) + + // Collate & convert mapped + COLLATE_FASTQ_MAP(SAMTOOLS_VIEW_MAP_MAP.out.bam, fasta, interleaved) + + // join Mapped & unmapped fastq + + reads_to_concat = COLLATE_FASTQ_MAP.out.fastq + .join(COLLATE_FASTQ_UNMAP.out.fastq, failOnDuplicate: true, failOnMismatch: true) + .map{ meta, mapped_reads, unmapped_reads -> [ meta, [ mapped_reads[0], mapped_reads[1], unmapped_reads[0], unmapped_reads[1] ] ] } + + // Concatenate Mapped_R1 with Unmapped_R1 and Mapped_R2 with Unmapped_R2 + CAT_FASTQ(reads_to_concat) + reads = CAT_FASTQ.out.reads + + // Gather versions of all tools used + versions = versions.mix(CAT_FASTQ.out.versions) + versions = versions.mix(COLLATE_FASTQ_MAP.out.versions) + versions = versions.mix(COLLATE_FASTQ_UNMAP.out.versions) + versions = versions.mix(SAMTOOLS_MERGE_UNMAP.out.versions) + versions = versions.mix(SAMTOOLS_VIEW_MAP_MAP.out.versions) + versions = versions.mix(SAMTOOLS_VIEW_MAP_UNMAP.out.versions) + versions = versions.mix(SAMTOOLS_VIEW_UNMAP_MAP.out.versions) + versions = versions.mix(SAMTOOLS_VIEW_UNMAP_UNMAP.out.versions) + + emit: + reads + + versions +} diff --git a/subworkflows/local/bam_joint_calling_germline_gatk/main.nf b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf new file mode 100644 index 0000000000..c432b80d43 --- /dev/null +++ b/subworkflows/local/bam_joint_calling_germline_gatk/main.nf @@ -0,0 +1,160 @@ +// +// JOINT GERMLINE CALLING +// +// Merge samples with genomicsdbimport, perform joint genotyping with genotypeGVCFS +// + +include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort/main' +include { GATK4_APPLYVQSR as GATK4_APPLYVQSR_INDEL } from '../../../modules/nf-core/gatk4/applyvqsr/main' +include { GATK4_APPLYVQSR as GATK4_APPLYVQSR_SNP } from '../../../modules/nf-core/gatk4/applyvqsr/main' +include { GATK4_GENOMICSDBIMPORT } from '../../../modules/nf-core/gatk4/genomicsdbimport/main' +include { GATK4_GENOTYPEGVCFS } from '../../../modules/nf-core/gatk4/genotypegvcfs/main' +include { GATK4_MERGEVCFS as MERGE_GENOTYPEGVCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_VQSR } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_VARIANTRECALIBRATOR as VARIANTRECALIBRATOR_INDEL } from '../../../modules/nf-core/gatk4/variantrecalibrator/main' +include { GATK4_VARIANTRECALIBRATOR as VARIANTRECALIBRATOR_SNP } from '../../../modules/nf-core/gatk4/variantrecalibrator/main' + +workflow BAM_JOINT_CALLING_GERMLINE_GATK { + take: + input // channel: [ meta, [ input ], [ input_index ], intervals ] + fasta // channel: [ fasta ] + fai // channel: [ fasta_fai ] + dict // channel: [ dict ] + dbsnp + dbsnp_tbi + dbsnp_vqsr + resource_indels_vcf + resource_indels_tbi + known_indels_vqsr + resource_snps_vcf + resource_snps_tbi + known_snps_vqsr + + main: + versions = Channel.empty() + + // Map input for GenomicsDBImport + // Rename based on num_intervals, group all samples by their interval_name/interval_file and restructure for channel + // Group by [0, 3] to avoid a list of metas and make sure that any intervals + gendb_input = input + .map{ meta, gvcf, tbi, intervals -> [ [ id:'joint_variant_calling', intervals_name:intervals.baseName, num_intervals:meta.num_intervals ], gvcf, tbi, intervals ] } + .groupTuple(by:3) //join on interval file + .map{ meta_list, gvcf, tbi, intervals -> + // meta is now a list of [meta1, meta2] but they are all the same. So take the first element. + [ meta_list[0], gvcf, tbi, intervals, [], [] ] + } + + // Convert all sample vcfs into a genomicsdb workspace using genomicsdbimport + GATK4_GENOMICSDBIMPORT(gendb_input, false, false, false) + + genotype_input = GATK4_GENOMICSDBIMPORT.out.genomicsdb.map{ meta, genomicsdb -> [ meta, genomicsdb, [], [], [] ] } + + // Joint genotyping performed using GenotypeGVCFs + // Sort vcfs called by interval within each VCF + + GATK4_GENOTYPEGVCFS(genotype_input, fasta, fai, dict, dbsnp.map{ it -> [ [:], it ] }, dbsnp_tbi.map{ it -> [ [:], it ] }) + + BCFTOOLS_SORT(GATK4_GENOTYPEGVCFS.out.vcf) + gvcf_to_merge = BCFTOOLS_SORT.out.vcf.map{ meta, vcf -> [ meta.subMap('num_intervals') + [ id:'joint_variant_calling', patient:'all_samples', variantcaller:'haplotypecaller' ], vcf ]}.groupTuple() + + // Merge scatter/gather vcfs & index + // Rework meta for variantscalled.csv and annotation tools + MERGE_GENOTYPEGVCFS(gvcf_to_merge, dict) + + vqsr_input = MERGE_GENOTYPEGVCFS.out.vcf.join(MERGE_GENOTYPEGVCFS.out.tbi, failOnDuplicate: true) + indels_resource_label = known_indels_vqsr.mix(dbsnp_vqsr).collect() + snps_resource_label = known_snps_vqsr.mix(dbsnp_vqsr).collect() + + // Recalibrate INDELs and SNPs separately + VARIANTRECALIBRATOR_INDEL( + vqsr_input, + resource_indels_vcf, + resource_indels_tbi, + indels_resource_label, + fasta.map{ meta, fasta_ -> [ fasta_ ] }, + fai.map{ meta, fai_ -> [ fai_ ] }, + dict.map{ meta, dict_ -> [ dict_ ] }) + + VARIANTRECALIBRATOR_SNP( + vqsr_input, + resource_snps_vcf, + resource_snps_tbi, + snps_resource_label, + fasta.map{ meta, fasta_ -> [ fasta_ ] }, + fai.map{ meta, fai_ -> [ fai_ ] }, + dict.map{ meta, dict_ -> [ dict_ ] }) + + //Prepare SNPs and INDELs for ApplyVQSR + // Step 1. : ApplyVQSR to SNPs + // Step 2. : Use ApplyVQSR_SNP output and run ApplyVQSR_INDEL. This avoids duplicate entries in the vcf as described here: https://hpc.nih.gov/training/gatk_tutorial/vqsr.html + + // Join results of variant recalibration into a single channel tuple + // Rework meta for variantscalled.csv and annotation tools + vqsr_input_snp = vqsr_input.join(VARIANTRECALIBRATOR_SNP.out.recal, failOnDuplicate: true) + .join(VARIANTRECALIBRATOR_SNP.out.idx, failOnDuplicate: true) + .join(VARIANTRECALIBRATOR_SNP.out.tranches, failOnDuplicate: true) + .map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] } + + GATK4_APPLYVQSR_SNP( + vqsr_input_snp, + fasta.map{ meta, fasta_ -> [ fasta_ ] }, + fai.map{ meta, fai_ -> [ fai_ ] }, + dict.map{ meta, dict_ -> [ dict_ ] }) + + // Join results of ApplyVQSR_SNP and use as input for Indels to avoid duplicate entries in the result + // Rework meta for variantscalled.csv and annotation tools + vqsr_input_indel = GATK4_APPLYVQSR_SNP.out.vcf.join(GATK4_APPLYVQSR_SNP.out.tbi).map{ meta, vcf, tbi -> [ meta + [ id:'joint_variant_calling' ], vcf, tbi ]} + .join(VARIANTRECALIBRATOR_INDEL.out.recal, failOnDuplicate: true) + .join(VARIANTRECALIBRATOR_INDEL.out.idx, failOnDuplicate: true) + .join(VARIANTRECALIBRATOR_INDEL.out.tranches, failOnDuplicate: true) + .map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] } + + GATK4_APPLYVQSR_INDEL( + vqsr_input_indel, + fasta.map{ meta, fasta_ -> [ fasta_ ] }, + fai.map{ meta, fai_ -> [ fai_ ] }, + dict.map{ meta, dict_ -> [ dict_ ] }) + + + // The following is an ugly monster to achieve the following: + // When MERGE_GENOTYPEGVCFS and GATK4_APPLYVQSR are run, then use output from APPLYVQSR + // When MERGE_GENOTYPEGVCFS and NOT GATK4_APPLYVQSR , then use the output from MERGE_GENOTYPEGVCFS + + merge_vcf_for_join = MERGE_GENOTYPEGVCFS.out.vcf.map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]} + merge_tbi_for_join = MERGE_GENOTYPEGVCFS.out.tbi.map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]} + + // Remap for both to have the same key, if ApplyBQSR is not run, the channel is empty --> populate with empty elements + vqsr_vcf_for_join = GATK4_APPLYVQSR_INDEL.out.vcf.ifEmpty([[:], []]).map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]} + vqsr_tbi_for_join = GATK4_APPLYVQSR_INDEL.out.tbi.ifEmpty([[:], []]).map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]} + + // Join on metamap + // If both --> meta, vcf_merged, vcf_bqsr + // If not VQSR --> meta, vcf_merged, [] + // if the second is empty, use the first + genotype_vcf = merge_vcf_for_join.join(vqsr_vcf_for_join, remainder: true).map{ + meta, joint_vcf, recal_vcf -> + + def vcf_out = recal_vcf ?: joint_vcf + + [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"haplotypecaller"], vcf_out] + } + + genotype_index = merge_tbi_for_join.join(vqsr_tbi_for_join, remainder: true).map{ + meta, joint_tbi, recal_tbi -> + + def tbi_out = recal_tbi ?: joint_tbi + + [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"haplotypecaller"], tbi_out] + } + + versions = versions.mix(GATK4_GENOMICSDBIMPORT.out.versions) + versions = versions.mix(GATK4_GENOTYPEGVCFS.out.versions) + versions = versions.mix(VARIANTRECALIBRATOR_SNP.out.versions) + versions = versions.mix(GATK4_APPLYVQSR_SNP.out.versions) + + emit: + genotype_index // channel: [ val(meta), [ tbi ] ] + genotype_vcf // channel: [ val(meta), [ vcf ] ] + + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/joint_germline_variant_calling/meta.yml b/subworkflows/local/bam_joint_calling_germline_gatk/meta.yml similarity index 100% rename from subworkflows/nf-core/joint_germline_variant_calling/meta.yml rename to subworkflows/local/bam_joint_calling_germline_gatk/meta.yml diff --git a/subworkflows/local/bam_joint_calling_germline_sentieon/main.nf b/subworkflows/local/bam_joint_calling_germline_sentieon/main.nf new file mode 100644 index 0000000000..b496cfe998 --- /dev/null +++ b/subworkflows/local/bam_joint_calling_germline_sentieon/main.nf @@ -0,0 +1,149 @@ +// +// JOINT GERMLINE CALLING +// +// Merge samples perform joint genotyping with SENTIEON_GVCFTYPER +// + +include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort/main' +include { GATK4_MERGEVCFS as MERGE_GENOTYPEGVCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { SENTIEON_APPLYVARCAL as SENTIEON_APPLYVARCAL_INDEL } from '../../../modules/nf-core/sentieon/applyvarcal/main' +include { SENTIEON_APPLYVARCAL as SENTIEON_APPLYVARCAL_SNP } from '../../../modules/nf-core/sentieon/applyvarcal/main' +include { SENTIEON_GVCFTYPER } from '../../../modules/nf-core/sentieon/gvcftyper/main' +include { SENTIEON_VARCAL as SENTIEON_VARCAL_INDEL } from '../../../modules/nf-core/sentieon/varcal/main' +include { SENTIEON_VARCAL as SENTIEON_VARCAL_SNP } from '../../../modules/nf-core/sentieon/varcal/main' + +workflow BAM_JOINT_CALLING_GERMLINE_SENTIEON { + take: + input // channel: [ meta, [ input ], [ input_index ], intervals ] + fasta // channel: [ fasta ] + fai // channel: [ fasta_fai ] + dict // channel: [ dict ] + dbsnp + dbsnp_tbi + dbsnp_vqsr + resource_indels_vcf + resource_indels_tbi + known_indels_vqsr + resource_snps_vcf + resource_snps_tbi + known_snps_vqsr + variant_caller + + main: + versions = Channel.empty() + + sentieon_input = input + .map{ meta, gvcf, tbi, intervals -> [ [ id:'joint_variant_calling', intervals_name:intervals.baseName, num_intervals:meta.num_intervals ], gvcf, tbi, intervals ] } + .groupTuple(by:[0, 3]) + + SENTIEON_GVCFTYPER( + sentieon_input, + fasta, + fai, + dbsnp.map{ file -> [[id:'dbsnp'], file] }, + dbsnp_tbi.map{ file -> [[id:'dbsnp'], file] }) + + BCFTOOLS_SORT(SENTIEON_GVCFTYPER.out.vcf_gz) + + gvcf_to_merge = BCFTOOLS_SORT.out.vcf.map{ meta, vcf -> [ meta.subMap('num_intervals') + [ id:'joint_variant_calling', patient:'all_samples', variantcaller:variant_caller ], vcf ]}.groupTuple() + + // Merge scatter/gather vcfs & index + // Rework meta for variantscalled.csv and annotation tools + MERGE_GENOTYPEGVCFS(gvcf_to_merge, dict) + + merged_vcf = MERGE_GENOTYPEGVCFS.out.vcf.map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]} + merged_tbi = MERGE_GENOTYPEGVCFS.out.tbi.map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]} + + if (variant_caller == 'sentieon_dnascope') { + // As advised by Don Freed (Sentieon), VQSR is skipped for DnaScope + genotype_vcf = merged_vcf.map{ + meta, vcf -> [ meta + [ patient:"all_samples", variantcaller:'sentieon_dnascope'], vcf ] + } + genotype_index = merged_tbi.map{ + meta, tbi -> [ meta + [ patient:"all_samples", variantcaller:'sentieon_dnascope'], tbi ] + } + } else { + vqsr_input = MERGE_GENOTYPEGVCFS.out.vcf.join(MERGE_GENOTYPEGVCFS.out.tbi, failOnDuplicate: true) + indels_resource_label = known_indels_vqsr.mix(dbsnp_vqsr).collect() + snps_resource_label = known_snps_vqsr.mix(dbsnp_vqsr).collect() + + // Recalibrate INDELs and SNPs separately + SENTIEON_VARCAL_INDEL( + vqsr_input, + resource_indels_vcf, + resource_indels_tbi, + indels_resource_label, + fasta.map{meta, it -> [ it ]}, + fai.map{meta, it -> [ it ]}) + + SENTIEON_VARCAL_SNP( + vqsr_input, + resource_snps_vcf, + resource_snps_tbi, + snps_resource_label, + fasta.map{meta, it -> [ it ]}, + fai.map{meta, it -> [ it ]}) + + //Prepare SNPs and INDELs for Sentieon's applyvarcal + // Step 1. : applyvarcal to SNPs + // Step 2. : Use SENTIEON_APPLYVARCAL_SNP output and run ApplyVQSR_INDEL. This avoids duplicate entries in the vcf as described here: https://hpc.nih.gov/training/gatk_tutorial/vqsr.html + + // Join results of variant recalibration into a single channel tuple + // Rework meta for variantscalled.csv and annotation tools + vqsr_input_snp = vqsr_input.join(SENTIEON_VARCAL_SNP.out.recal, failOnDuplicate: true) + .join(SENTIEON_VARCAL_SNP.out.idx, failOnDuplicate: true) + .join(SENTIEON_VARCAL_SNP.out.tranches, failOnDuplicate: true) + .map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] } + + SENTIEON_APPLYVARCAL_SNP(vqsr_input_snp, fasta, fai) + + // Join results of SENTIEON_APPLYVARCAL_SNP and use as input for SENTIEON_APPLYVARCAL_INDEL to avoid duplicate entries in the result + // Rework meta for variantscalled.csv and annotation tools + vqsr_input_indel = SENTIEON_APPLYVARCAL_SNP.out.vcf.join(SENTIEON_APPLYVARCAL_SNP.out.tbi).map{ meta, vcf, tbi -> [ meta + [ id:'joint_variant_calling' ], vcf, tbi ]} + .join(SENTIEON_VARCAL_INDEL.out.recal, failOnDuplicate: true) + .join(SENTIEON_VARCAL_INDEL.out.idx, failOnDuplicate: true) + .join(SENTIEON_VARCAL_INDEL.out.tranches, failOnDuplicate: true) + .map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] } + + SENTIEON_APPLYVARCAL_INDEL(vqsr_input_indel, fasta, fai) + + // The following is an ugly monster to achieve the following: + // When MERGE_GENOTYPEGVCFS and SENTIEON_APPLYVARCAL are run, then use output from SENTIEON_APPLYVARCAL + // When MERGE_GENOTYPEGVCFS and NOT SENTIEON_APPLYVARCAL, then use the output from MERGE_GENOTYPEGVCFS + + // Remap for both to have the same key, if ApplyBQSR is not run, the channel is empty --> populate with empty elements + vqsr_vcf_for_join = SENTIEON_APPLYVARCAL_INDEL.out.vcf.ifEmpty([[:], []]).map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]} + vqsr_tbi_for_join = SENTIEON_APPLYVARCAL_INDEL.out.tbi.ifEmpty([[:], []]).map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]} + + // Join on metamap + // If both --> meta, vcf_merged, vcf_bqsr + // If not VQSR --> meta, vcf_merged, [] + // if the second is empty, use the first + genotype_vcf = merged_vcf.join(vqsr_vcf_for_join, remainder: true).map{ + meta, joint_vcf, recal_vcf -> + + def vcf_out = recal_vcf ?: joint_vcf + + [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"sentieon_haplotyper"], vcf_out] + } + + genotype_index = merged_tbi.join(vqsr_tbi_for_join, remainder: true).map{ + meta, joint_tbi, recal_tbi -> + + def tbi_out = recal_tbi ?: joint_tbi + [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"sentieon_haplotyper"], tbi_out] + } + + versions = versions.mix(SENTIEON_VARCAL_SNP.out.versions) + versions = versions.mix(SENTIEON_VARCAL_INDEL.out.versions) + versions = versions.mix(SENTIEON_APPLYVARCAL_INDEL.out.versions) + } + + versions = versions.mix(SENTIEON_GVCFTYPER.out.versions) + + emit: + genotype_index // channel: [ val(meta), [ tbi ] ] + genotype_vcf // channel: [ val(meta), [ vcf ] ] + + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_markduplicates/main.nf b/subworkflows/local/bam_markduplicates/main.nf new file mode 100644 index 0000000000..6c61793c17 --- /dev/null +++ b/subworkflows/local/bam_markduplicates/main.nf @@ -0,0 +1,43 @@ +// +// MARKDUPLICATES AND QC after mapping +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { CRAM_QC_MOSDEPTH_SAMTOOLS } from '../cram_qc_mosdepth_samtools/main' +include { GATK4_MARKDUPLICATES } from '../../../modules/nf-core/gatk4/markduplicates/main' + +workflow BAM_MARKDUPLICATES { + take: + bam // channel: [mandatory] [ meta, bam ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals_bed_combined // channel: [optional] [ intervals_bed ] + + main: + versions = Channel.empty() + reports = Channel.empty() + + // RUN MARKUPDUPLICATES + GATK4_MARKDUPLICATES(bam, fasta.map{ meta, fasta_ -> [ fasta_ ] }, fasta_fai.map{ meta, fasta_fai_ -> [ fasta_fai_ ] }) + + // Join with the crai file + cram = GATK4_MARKDUPLICATES.out.cram.join(GATK4_MARKDUPLICATES.out.crai, failOnDuplicate: true, failOnMismatch: true) + + // QC on CRAM + CRAM_QC_MOSDEPTH_SAMTOOLS(cram, fasta, intervals_bed_combined) + + // Gather all reports generated + reports = reports.mix(GATK4_MARKDUPLICATES.out.metrics) + reports = reports.mix(CRAM_QC_MOSDEPTH_SAMTOOLS.out.reports) + + // Gather versions of all tools used + versions = versions.mix(GATK4_MARKDUPLICATES.out.versions) + versions = versions.mix(CRAM_QC_MOSDEPTH_SAMTOOLS.out.versions) + + emit: + cram + reports + + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_markduplicates_spark/main.nf b/subworkflows/local/bam_markduplicates_spark/main.nf new file mode 100644 index 0000000000..cffefa2a72 --- /dev/null +++ b/subworkflows/local/bam_markduplicates_spark/main.nf @@ -0,0 +1,54 @@ +// +// MARKDUPLICATES SPARK AND QC after mapping +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { CRAM_QC_MOSDEPTH_SAMTOOLS } from '../cram_qc_mosdepth_samtools/main' +include { GATK4_ESTIMATELIBRARYCOMPLEXITY } from '../../../modules/nf-core/gatk4/estimatelibrarycomplexity/main' +include { GATK4SPARK_MARKDUPLICATES } from '../../../modules/nf-core/gatk4spark/markduplicates/main' +include { SAMTOOLS_INDEX as INDEX_MARKDUPLICATES } from '../../../modules/nf-core/samtools/index/main' + +workflow BAM_MARKDUPLICATES_SPARK { + take: + bam // channel: [mandatory] meta, bam + dict // channel: [mandatory] dict + fasta // channel: [mandatory] fasta + fasta_fai // channel: [mandatory] fasta_fai + intervals_bed_combined // channel: [optional] intervals_bed + + main: + versions = Channel.empty() + reports = Channel.empty() + + // RUN MARKUPDUPLICATES SPARK + GATK4SPARK_MARKDUPLICATES(bam, fasta.map{ meta, fasta_ -> [ fasta_ ] }, fasta_fai.map{ meta, fasta_fai_ -> [ fasta_fai_ ] }, dict.map{ meta, dict_ -> [ dict_ ] }) + + // Index cram + INDEX_MARKDUPLICATES(GATK4SPARK_MARKDUPLICATES.out.output) + + // Join with the crai file + cram = GATK4SPARK_MARKDUPLICATES.out.output.join(INDEX_MARKDUPLICATES.out.crai, failOnDuplicate: true, failOnMismatch: true) + + // QC on CRAM + CRAM_QC_MOSDEPTH_SAMTOOLS(cram, fasta, intervals_bed_combined) + + // When running Marduplicates spark, and saving reports + GATK4_ESTIMATELIBRARYCOMPLEXITY(bam, fasta.map{ meta, fasta_ -> [ fasta_ ] }, fasta_fai.map{ meta, fasta_fai_ -> [ fasta_fai_ ] }, dict.map{ meta, dict_ -> [ dict_ ] }) + + // Gather all reports generated + reports = reports.mix(GATK4_ESTIMATELIBRARYCOMPLEXITY.out.metrics) + reports = reports.mix(CRAM_QC_MOSDEPTH_SAMTOOLS.out.reports) + + // Gather versions of all tools used + versions = versions.mix(GATK4_ESTIMATELIBRARYCOMPLEXITY.out.versions) + versions = versions.mix(GATK4SPARK_MARKDUPLICATES.out.versions) + versions = versions.mix(INDEX_MARKDUPLICATES.out.versions) + versions = versions.mix(CRAM_QC_MOSDEPTH_SAMTOOLS.out.versions) + + emit: + cram + reports + + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_merge_index_samtools/main.nf b/subworkflows/local/bam_merge_index_samtools/main.nf new file mode 100644 index 0000000000..d8b1657a00 --- /dev/null +++ b/subworkflows/local/bam_merge_index_samtools/main.nf @@ -0,0 +1,45 @@ +// +// MERGE INDEX BAM +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { SAMTOOLS_INDEX as INDEX_MERGE_BAM } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_MERGE as MERGE_BAM } from '../../../modules/nf-core/samtools/merge/main' + +workflow BAM_MERGE_INDEX_SAMTOOLS { + take: + bam // channel: [mandatory] meta, bam + + main: + versions = Channel.empty() + + // Figuring out if there is one or more bam(s) from the same sample + bam_to_merge = bam.branch{ meta, bam_ -> + // bam is a list, so use bam.size() to asses number of intervals + single: bam_.size() <= 1 + return [ meta, bam_[0] ] + multiple: bam_.size() > 1 + } + + // Only when using intervals + MERGE_BAM(bam_to_merge.multiple, [ [ id:'null' ], []], [ [ id:'null' ], []]) + + // Mix intervals and no_intervals channels together + bam_all = MERGE_BAM.out.bam.mix(bam_to_merge.single) + + // Index bam + INDEX_MERGE_BAM(bam_all) + + // Join with the bai file + bam_bai = bam_all.join(INDEX_MERGE_BAM.out.bai, failOnDuplicate: true, failOnMismatch: true) + + // Gather versions of all tools used + versions = versions.mix(INDEX_MERGE_BAM.out.versions) + versions = versions.mix(MERGE_BAM.out.versions) + + emit: + bam_bai + + versions +} diff --git a/subworkflows/local/bam_sentieon_dedup/main.nf b/subworkflows/local/bam_sentieon_dedup/main.nf new file mode 100644 index 0000000000..010e8db0be --- /dev/null +++ b/subworkflows/local/bam_sentieon_dedup/main.nf @@ -0,0 +1,45 @@ +// +// SENTIEON DEDUP + +include { CRAM_QC_MOSDEPTH_SAMTOOLS } from '../cram_qc_mosdepth_samtools/main' +include { SENTIEON_DEDUP } from '../../../modules/nf-core/sentieon/dedup/main' + +workflow BAM_SENTIEON_DEDUP { + take: + bam // channel: [mandatory] [ meta, bam ] // Although the channel is named "bam", it may contain cram-files. + bai + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals_bed_combined // channel: [optional] [ intervals_bed ] + + main: + versions = Channel.empty() + reports = Channel.empty() + + bam = bam.map{ meta, bam_ -> [ meta - meta.subMap('data_type'), bam_ ] } + bai = bai.map{ meta, bai_ -> [ meta - meta.subMap('data_type'), bai_ ] } + bam_bai = bam.join(bai, failOnMismatch:true, failOnDuplicate:true) + SENTIEON_DEDUP(bam_bai, fasta, fasta_fai) + + // Join with the crai file + cram = SENTIEON_DEDUP.out.cram.join(SENTIEON_DEDUP.out.crai, failOnDuplicate: true, failOnMismatch: true) + + // QC on CRAM + CRAM_QC_MOSDEPTH_SAMTOOLS(cram, fasta, intervals_bed_combined) + + // Gather all reports generated + reports = reports.mix(SENTIEON_DEDUP.out.metrics) + reports = reports.mix(SENTIEON_DEDUP.out.metrics_multiqc_tsv) + reports = reports.mix(SENTIEON_DEDUP.out.score) + reports = reports.mix(CRAM_QC_MOSDEPTH_SAMTOOLS.out.reports) + + // Gather versions of all tools used + versions = versions.mix(SENTIEON_DEDUP.out.versions) + versions = versions.mix(CRAM_QC_MOSDEPTH_SAMTOOLS.out.versions) + + emit: + cram + reports + + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_variant_calling_cnvkit/main.nf b/subworkflows/local/bam_variant_calling_cnvkit/main.nf new file mode 100644 index 0000000000..20d0fe3fd1 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_cnvkit/main.nf @@ -0,0 +1,46 @@ +// +// CNVKIT calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { CNVKIT_BATCH } from '../../../modules/nf-core/cnvkit/batch/main' +include { CNVKIT_CALL } from '../../../modules/nf-core/cnvkit/call/main' +include { CNVKIT_EXPORT } from '../../../modules/nf-core/cnvkit/export/main' +include { CNVKIT_GENEMETRICS } from '../../../modules/nf-core/cnvkit/genemetrics/main' + +workflow BAM_VARIANT_CALLING_CNVKIT { + take: + cram // channel: [mandatory] meta, cram + fasta // channel: [mandatory] meta, fasta + fasta_fai // channel: [optional] meta, fasta_fai + targets // channel: [mandatory] meta, bed + reference // channel: [optional] meta, cnn + + main: + versions = Channel.empty() + generate_pon = false + + CNVKIT_BATCH(cram, fasta, fasta_fai, targets, reference, generate_pon) + + // right now we do not use an input VCF to improve the calling of B alleles + // based on SNV frequencies from the VCF file + // in the future we might consider to add this, by connecting the emission from + // SNV variant calling modules + CNVKIT_CALL(CNVKIT_BATCH.out.cns.map{ meta, cns -> [meta, cns[2], []]}) + + // export to VCF for compatibility with other tools + CNVKIT_EXPORT(CNVKIT_CALL.out.cns) + + ch_genemetrics = CNVKIT_BATCH.out.cnr.join(CNVKIT_BATCH.out.cns).map{ meta, cnr, cns -> [meta, cnr, cns[2]]} + CNVKIT_GENEMETRICS(ch_genemetrics) + + versions = versions.mix(CNVKIT_BATCH.out.versions) + versions = versions.mix(CNVKIT_GENEMETRICS.out.versions) + versions = versions.mix(CNVKIT_CALL.out.versions) + versions = versions.mix(CNVKIT_EXPORT.out.versions) + emit: + cnv_calls_raw = CNVKIT_CALL.out.cns // channel: [ meta, cns ] + cnv_calls_export = CNVKIT_EXPORT.out.output // channel: [ meta, export_format ] + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_variant_calling_deepvariant/main.nf b/subworkflows/local/bam_variant_calling_deepvariant/main.nf new file mode 100644 index 0000000000..b3c779af85 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_deepvariant/main.nf @@ -0,0 +1,82 @@ +// +// DEEPVARIANT germline calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { DEEPVARIANT_RUNDEEPVARIANT } from '../../../modules/nf-core/deepvariant/rundeepvariant/main' +include { GATK4_MERGEVCFS as MERGE_DEEPVARIANT_GVCF } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_DEEPVARIANT_VCF } from '../../../modules/nf-core/gatk4/mergevcfs/main' + +// Deepvariant: https://github.com/google/deepvariant/issues/510 +workflow BAM_VARIANT_CALLING_DEEPVARIANT { + take: + cram // channel: [mandatory] [ meta, cram, crai ] + dict // channel: [optional] [ meta, dict ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + + main: + versions = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals = cram.combine(intervals) + // Move num_intervals to meta map + .map{ meta, cram_, crai, intervals_, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram_, crai, intervals_ ]} + + DEEPVARIANT_RUNDEEPVARIANT(cram_intervals, fasta, fasta_fai, [ [ id:'null' ], [] ], [ [ id:'null' ], [] ]) + + // Figuring out if there is one or more vcf(s) from the same sample + vcf_out = DEEPVARIANT_RUNDEEPVARIANT.out.vcf.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more gvcf(s) from the same sample + gvcf_out = DEEPVARIANT_RUNDEEPVARIANT.out.gvcf.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + gvcf_to_merge = gvcf_out.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + vcf_to_merge = vcf_out.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + + MERGE_DEEPVARIANT_GVCF(gvcf_to_merge, dict) + MERGE_DEEPVARIANT_VCF(vcf_to_merge, dict) + + // Figuring out if there is one or more tbi(s) from the same sample + tbi_out = DEEPVARIANT_RUNDEEPVARIANT.out.vcf_index.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Mix intervals and no_intervals channels together + gvcf = Channel.empty().mix(MERGE_DEEPVARIANT_GVCF.out.vcf, gvcf_out.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'deepvariant' ], vcf ] } + + // Mix intervals and no_intervals channels together + vcf = Channel.empty().mix(MERGE_DEEPVARIANT_VCF.out.vcf, vcf_out.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'deepvariant' ], vcf ] } + + tbi = Channel.empty().mix(MERGE_DEEPVARIANT_VCF.out.tbi, tbi_out.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, tbi -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'deepvariant' ], tbi ] } + + versions = versions.mix(DEEPVARIANT_RUNDEEPVARIANT.out.versions) + versions = versions.mix(MERGE_DEEPVARIANT_GVCF.out.versions) + versions = versions.mix(MERGE_DEEPVARIANT_VCF.out.versions) + + emit: + gvcf + vcf + tbi + + versions +} diff --git a/subworkflows/local/bam_variant_calling_freebayes/main.nf b/subworkflows/local/bam_variant_calling_freebayes/main.nf new file mode 100644 index 0000000000..b176ed7bb3 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_freebayes/main.nf @@ -0,0 +1,81 @@ +// +// FREEBAYES variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort' +include { FREEBAYES } from '../../../modules/nf-core/freebayes' +include { GATK4_MERGEVCFS as MERGE_FREEBAYES } from '../../../modules/nf-core/gatk4/mergevcfs' +include { TABIX_TABIX as TABIX_VC_FREEBAYES } from '../../../modules/nf-core/tabix/tabix' +include { TABIX_TABIX as TABIX_VC_FREEBAYES_FILT } from '../../../modules/nf-core/tabix/tabix' +include { VCFLIB_VCFFILTER } from '../../../modules/nf-core/vcflib/vcffilter' + +workflow BAM_VARIANT_CALLING_FREEBAYES { + take: + ch_cram // channel: [mandatory] [ meta, cram1, crai1, cram2, crai2 ] or [ meta, cram, crai, [], [] ] + ch_dict // channel: [mandatory] [ meta, dict ] + ch_fasta // channel: [mandatory] [ meta, fasta ] + ch_fasta_fai // channel: [mandatory] [ meta, fasta_fai ] + ch_intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + + main: + versions = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals = ch_cram.combine(ch_intervals) + // Move num_intervals to meta map and reorganize channel for FREEBAYES module + .map{ meta, cram1, crai1, cram2, crai2, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram1, crai1, cram2, crai2, intervals ]} + + FREEBAYES(cram_intervals, ch_fasta, ch_fasta_fai, [[id:'null'], []], [[id:'null'], []], [[id:'null'], []]) + + BCFTOOLS_SORT(FREEBAYES.out.vcf) + + // Figuring out if there is one or more vcf(s) from the same sample + bcftools_vcf_out = BCFTOOLS_SORT.out.vcf.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + vcf_to_merge = bcftools_vcf_out.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + MERGE_FREEBAYES(vcf_to_merge, ch_dict) + + // Only when no_intervals + TABIX_VC_FREEBAYES(bcftools_vcf_out.no_intervals) + + // Mix intervals and no_intervals channels together, including the tabix index + merged_vcf_with_tbi = MERGE_FREEBAYES.out.vcf + .join(MERGE_FREEBAYES.out.tbi, by: [0]) + .map{ meta, vcf, tbi -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'freebayes' ], vcf, tbi ] } + + no_intervals_with_tbi = bcftools_vcf_out.no_intervals + .join(TABIX_VC_FREEBAYES.out.tbi, by: [0]) + .map{ meta, vcf, tbi -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'freebayes' ], vcf, tbi ] } + + // Final channel with VCF and its index + ch_vcf = merged_vcf_with_tbi.mix(no_intervals_with_tbi) + + VCFLIB_VCFFILTER(ch_vcf) + + vcf_filtered = VCFLIB_VCFFILTER.out.vcf + + // Index the filtered VCFs + TABIX_VC_FREEBAYES_FILT(vcf_filtered) + + versions = versions.mix(BCFTOOLS_SORT.out.versions) + versions = versions.mix(FREEBAYES.out.versions) + versions = versions.mix(MERGE_FREEBAYES.out.versions) + versions = versions.mix(TABIX_VC_FREEBAYES.out.versions) + versions = versions.mix(TABIX_VC_FREEBAYES_FILT.out.versions) + versions = versions.mix(VCFLIB_VCFFILTER.out.versions) + + emit: + vcf_unfiltered = ch_vcf // channel: [ meta, vcf, tbi ] + + // Use the QUAL filtered vcfs for the next steps + vcf = vcf_filtered // channel: [ meta, vcf ] + tbi = TABIX_VC_FREEBAYES_FILT.out.tbi // channel: [ meta, tbi ] + versions +} diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf new file mode 100644 index 0000000000..feed96a33f --- /dev/null +++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf @@ -0,0 +1,428 @@ +// +// GERMLINE VARIANT CALLING +// + +include { BAM_JOINT_CALLING_GERMLINE_GATK } from '../bam_joint_calling_germline_gatk/main' +include { BAM_JOINT_CALLING_GERMLINE_SENTIEON } from '../bam_joint_calling_germline_sentieon/main' +include { BAM_VARIANT_CALLING_CNVKIT } from '../bam_variant_calling_cnvkit/main' +include { BAM_VARIANT_CALLING_DEEPVARIANT } from '../bam_variant_calling_deepvariant/main' +include { BAM_VARIANT_CALLING_FREEBAYES } from '../bam_variant_calling_freebayes/main' +include { BAM_VARIANT_CALLING_GERMLINE_MANTA } from '../bam_variant_calling_germline_manta/main' +include { BAM_VARIANT_CALLING_HAPLOTYPECALLER } from '../bam_variant_calling_haplotypecaller/main' +include { BAM_VARIANT_CALLING_INDEXCOV } from '../bam_variant_calling_indexcov/main' +include { BAM_VARIANT_CALLING_SENTIEON_DNASCOPE } from '../bam_variant_calling_sentieon_dnascope/main' +include { BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER } from '../bam_variant_calling_sentieon_haplotyper/main' +include { BAM_VARIANT_CALLING_MPILEUP } from '../bam_variant_calling_mpileup/main' +include { BAM_VARIANT_CALLING_SINGLE_STRELKA } from '../bam_variant_calling_single_strelka/main' +include { BAM_VARIANT_CALLING_SINGLE_TIDDIT } from '../bam_variant_calling_single_tiddit/main' +include { SENTIEON_DNAMODELAPPLY } from '../../../modules/nf-core/sentieon/dnamodelapply/main' +include { VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main' +include { VCF_VARIANT_FILTERING_GATK as SENTIEON_HAPLOTYPER_VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main' + +workflow BAM_VARIANT_CALLING_GERMLINE_ALL { + take: + tools // Mandatory, list of tools to apply + skip_tools // Mandatory, list of tools to skip + bam // channel: [mandatory] meta, bam + cram // channel: [mandatory] meta, cram + bwa // channel: [mandatory] meta, bwa + cnvkit_reference // channel: [optional] cnvkit reference + dbsnp // channel: [mandatory] meta, dbsnp + dbsnp_tbi // channel: [mandatory] dbsnp_tbi + dbsnp_vqsr + dict // channel: [mandatory] meta, dict + fasta // channel: [mandatory] meta, fasta + fasta_fai // channel: [mandatory] meta, fasta_fai + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped + intervals_bed_gz_tbi_combined // channel: [mandatory] intervals/target regions in one file zipped + intervals_bed_combined_haplotypec // channel: [mandatory] intervals/target regions in one file unzipped, no_intervals.bed if no_intervals + intervals_bed_gz_tbi // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi, num_intervals ] or [ [], [], 0 ] if no intervals + known_indels_vqsr + known_sites_indels + known_sites_indels_tbi + known_sites_snps + known_sites_snps_tbi + known_snps_vqsr + joint_germline // boolean: [mandatory] [default: false] joint calling of germline variants + skip_haplotypecaller_filter // boolean: [mandatory] [default: false] whether to filter haplotypecaller single sample vcfs + sentieon_haplotyper_emit_mode // channel: [mandatory] value channel with string + sentieon_dnascope_emit_mode // channel: [mandatory] value channel with string + sentieon_dnascope_pcr_indel_model // channel: [mandatory] value channel with string + sentieon_dnascope_model // channel: [mandatory] value channel with string + + main: + versions = Channel.empty() + + //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config + gvcf_sentieon_dnascope = Channel.empty() + gvcf_sentieon_haplotyper = Channel.empty() + + out_indexcov = Channel.empty() + vcf_deepvariant = Channel.empty() + vcf_freebayes = Channel.empty() + vcf_haplotypecaller = Channel.empty() + vcf_manta = Channel.empty() + vcf_mpileup = Channel.empty() + vcf_sentieon_dnascope = Channel.empty() + vcf_sentieon_haplotyper = Channel.empty() + vcf_strelka = Channel.empty() + vcf_tiddit = Channel.empty() + tbi_deepvariant = Channel.empty() + tbi_freebayes = Channel.empty() + tbi_haplotypecaller = Channel.empty() + tbi_manta = Channel.empty() + tbi_mpileup = Channel.empty() + tbi_sentieon_dnascope = Channel.empty() + tbi_sentieon_haplotyper = Channel.empty() + tbi_strelka = Channel.empty() + tbi_tiddit = Channel.empty() + + // BCFTOOLS MPILEUP + if (tools && tools.split(',').contains('mpileup')) { + BAM_VARIANT_CALLING_MPILEUP( + cram, + dict, + fasta, + intervals + ) + vcf_mpileup = BAM_VARIANT_CALLING_MPILEUP.out.vcf + tbi_mpileup = BAM_VARIANT_CALLING_MPILEUP.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_MPILEUP.out.versions) + } + + // CNVKIT + if (tools && tools.split(',').contains('cnvkit')) { + BAM_VARIANT_CALLING_CNVKIT( + // Remap channel to match module/subworkflow + cram.map{ meta, cram_, crai -> [ meta, [], cram_ ] }, + fasta, + fasta_fai, + intervals_bed_combined.map{it -> it ? [[id:it[0].baseName], it]: [[id:'no_intervals'], []]}, + params.cnvkit_reference ? cnvkit_reference.map{ it -> [[id:it[0].baseName], it] } : [[:],[]] + ) + versions = versions.mix(BAM_VARIANT_CALLING_CNVKIT.out.versions) + } + + // DEEPVARIANT + if (tools && tools.split(',').contains('deepvariant')) { + BAM_VARIANT_CALLING_DEEPVARIANT( + cram, + dict, + fasta, + fasta_fai, + intervals + ) + + vcf_deepvariant = BAM_VARIANT_CALLING_DEEPVARIANT.out.vcf + tbi_deepvariant = BAM_VARIANT_CALLING_DEEPVARIANT.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_DEEPVARIANT.out.versions) + } + + // FREEBAYES + if (tools && tools.split(',').contains('freebayes')) { + // Input channel is remapped to match input of module/subworkflow + BAM_VARIANT_CALLING_FREEBAYES( + // Remap channel to match module/subworkflow + cram.map{ meta, cram_, crai -> [ meta, cram_, crai, [], [] ] }, + dict, + fasta, + fasta_fai, + intervals + ) + + vcf_freebayes = BAM_VARIANT_CALLING_FREEBAYES.out.vcf + tbi_freebayes = BAM_VARIANT_CALLING_FREEBAYES.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_FREEBAYES.out.versions) + } + + // HAPLOTYPECALLER + if (tools && tools.split(',').contains('haplotypecaller')) { + BAM_VARIANT_CALLING_HAPLOTYPECALLER( + cram, + fasta, + fasta_fai, + dict, + dbsnp.map{it -> [[:], it]}, + dbsnp_tbi.map{it -> [[:], it]}, + intervals) + + vcf_haplotypecaller = BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.vcf + tbi_haplotypecaller = BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.tbi + + versions = versions.mix(BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.versions) + + if (joint_germline) { + BAM_JOINT_CALLING_GERMLINE_GATK( + BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.gvcf_tbi_intervals, + fasta, + fasta_fai, + dict, + dbsnp, + dbsnp_tbi, + dbsnp_vqsr, + known_sites_indels, + known_sites_indels_tbi, + known_indels_vqsr, + known_sites_snps, + known_sites_snps_tbi, + known_snps_vqsr) + + vcf_haplotypecaller = BAM_JOINT_CALLING_GERMLINE_GATK.out.genotype_vcf + tbi_haplotypecaller = BAM_JOINT_CALLING_GERMLINE_GATK.out.genotype_index + versions = versions.mix(BAM_JOINT_CALLING_GERMLINE_GATK.out.versions) + } else { + + // If single sample track, check if filtering should be done + if (!skip_haplotypecaller_filter) { + + VCF_VARIANT_FILTERING_GATK( + vcf_haplotypecaller.join(tbi_haplotypecaller, failOnDuplicate: true, failOnMismatch: true), + fasta.map{ meta, fasta_ -> [ fasta_ ] }, + fasta_fai.map{ meta, fasta_fai_ -> [ fasta_fai_ ] }, + dict.map{ meta, dict_ -> [ dict_ ] }, + intervals_bed_combined_haplotypec, + known_sites_indels.concat(known_sites_snps).flatten().unique().collect(), + known_sites_indels_tbi.concat(known_sites_snps_tbi).flatten().unique().collect()) + + vcf_haplotypecaller = VCF_VARIANT_FILTERING_GATK.out.filtered_vcf + tbi_haplotypecaller = VCF_VARIANT_FILTERING_GATK.out.filtered_tbi + + versions = versions.mix(VCF_VARIANT_FILTERING_GATK.out.versions) + } + } + } + + // MANTA + if (tools && tools.split(',').contains('manta')) { + BAM_VARIANT_CALLING_GERMLINE_MANTA ( + cram, + fasta, + fasta_fai, + intervals_bed_gz_tbi_combined + ) + + vcf_manta = BAM_VARIANT_CALLING_GERMLINE_MANTA.out.vcf + tbi_manta = BAM_VARIANT_CALLING_GERMLINE_MANTA.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_MANTA.out.versions) + } + + // INDEXCOV, for WGS only + if (params.wes==false && tools.split(',').contains('indexcov')) { + BAM_VARIANT_CALLING_INDEXCOV ( + cram, + fasta, + fasta_fai + ) + + out_indexcov = BAM_VARIANT_CALLING_INDEXCOV.out.out_indexcov + versions = versions.mix(BAM_VARIANT_CALLING_INDEXCOV.out.versions) + } + + // SENTIEON DNASCOPE + if (tools && tools.split(',').contains('sentieon_dnascope')) { + BAM_VARIANT_CALLING_SENTIEON_DNASCOPE( + cram, + fasta, + fasta_fai, + dict, + dbsnp, + dbsnp_tbi, + dbsnp_vqsr, + intervals, + joint_germline, + sentieon_dnascope_emit_mode, + sentieon_dnascope_pcr_indel_model, + sentieon_dnascope_model) + + versions = versions.mix(BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.versions) + + vcf_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.vcf + tbi_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.vcf_tbi + gvcf_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.gvcf + gvcf_tbi_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.gvcf_tbi + + if (joint_germline) { + BAM_JOINT_CALLING_GERMLINE_SENTIEON( + BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.genotype_intervals, + fasta, + fasta_fai, + dict, + dbsnp, + dbsnp_tbi, + dbsnp_vqsr, + known_sites_indels, + known_sites_indels_tbi, + known_indels_vqsr, + known_sites_snps, + known_sites_snps_tbi, + known_snps_vqsr, + 'sentieon_dnascope') + + vcf_sentieon_dnascope = BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.genotype_vcf + tbi_sentieon_dnascope = BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.genotype_index + versions = versions.mix(BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.versions) + } else { + // If single sample track, check if filtering should be done + if (!(skip_tools && skip_tools.split(',').contains('dnascope_filter'))) { + + SENTIEON_DNAMODELAPPLY( + vcf_sentieon_dnascope.join(tbi_sentieon_dnascope, failOnDuplicate: true, failOnMismatch: true), + fasta, + fasta_fai, + sentieon_dnascope_model.map{ model -> [ [ id:model.baseName ], model ] }) + + vcf_sentieon_dnascope = SENTIEON_DNAMODELAPPLY.out.vcf + tbi_sentieon_dnascope = SENTIEON_DNAMODELAPPLY.out.tbi + versions = versions.mix(SENTIEON_DNAMODELAPPLY.out.versions) + + } + + } + } + + // SENTIEON HAPLOTYPER + if (tools && tools.split(',').contains('sentieon_haplotyper')) { + BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER( + cram, + fasta, + fasta_fai, + dict, + dbsnp, + dbsnp_tbi, + dbsnp_vqsr, + intervals, + joint_germline, + sentieon_haplotyper_emit_mode) + + versions = versions.mix(BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.versions) + + vcf_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.vcf + tbi_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.vcf_tbi + gvcf_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.gvcf + gvcf_tbi_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.gvcf_tbi + + if (joint_germline) { + BAM_JOINT_CALLING_GERMLINE_SENTIEON( + BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.genotype_intervals, + fasta, + fasta_fai, + dict, + dbsnp, + dbsnp_tbi, + dbsnp_vqsr, + known_sites_indels, + known_sites_indels_tbi, + known_indels_vqsr, + known_sites_snps, + known_sites_snps_tbi, + known_snps_vqsr, + 'sentieon_haplotyper') + + vcf_sentieon_haplotyper = BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.genotype_vcf + tbi_sentieon_haplotyper = BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.genotype_index + versions = versions.mix(BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.versions) + } else { + + // If single sample track, check if filtering should be done + if (!(skip_tools && skip_tools.split(',').contains('haplotyper_filter'))) { + + SENTIEON_HAPLOTYPER_VCF_VARIANT_FILTERING_GATK( + vcf_sentieon_haplotyper.join(tbi_sentieon_haplotyper, failOnDuplicate: true, failOnMismatch: true), + fasta.map{ meta, it -> [ it ] }, + fasta_fai.map{ meta, it -> [ it ] }, + dict.map{ meta, dict_ -> [ dict_ ] }, + intervals_bed_combined_haplotypec, + known_sites_indels.concat(known_sites_snps).flatten().unique().collect(), + known_sites_indels_tbi.concat(known_sites_snps_tbi).flatten().unique().collect()) + + vcf_sentieon_haplotyper = SENTIEON_HAPLOTYPER_VCF_VARIANT_FILTERING_GATK.out.filtered_vcf + tbi_sentieon_haplotyper = SENTIEON_HAPLOTYPER_VCF_VARIANT_FILTERING_GATK.out.filtered_tbi + + versions = versions.mix(SENTIEON_HAPLOTYPER_VCF_VARIANT_FILTERING_GATK.out.versions) + } + } + } + + // STRELKA + + if (tools && tools.split(',').contains('strelka')) { + + BAM_VARIANT_CALLING_SINGLE_STRELKA( + cram, + dict, + fasta.map{ meta, fasta_ -> [ fasta_ ] }, + fasta_fai.map{ meta, fasta_fai_ -> [ fasta_fai_ ] }, + intervals_bed_gz_tbi + ) + + vcf_strelka = BAM_VARIANT_CALLING_SINGLE_STRELKA.out.vcf + tbi_strelka = BAM_VARIANT_CALLING_SINGLE_STRELKA.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_SINGLE_STRELKA.out.versions) + } + + // TIDDIT + if (tools && tools.split(',').contains('tiddit')) { + BAM_VARIANT_CALLING_SINGLE_TIDDIT( + cram, + // Remap channel to match module/subworkflow + fasta, + bwa + ) + + vcf_tiddit = BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.vcf + tbi_tiddit = BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.versions) + } + + vcf_all = Channel.empty().mix( + vcf_deepvariant, + vcf_freebayes, + vcf_sentieon_dnascope, + vcf_haplotypecaller, + vcf_manta, + vcf_mpileup, + vcf_sentieon_haplotyper, + vcf_strelka, + vcf_tiddit + ) + + tbi_all = Channel.empty().mix( + tbi_deepvariant, + tbi_freebayes, + tbi_sentieon_dnascope, + tbi_haplotypecaller, + tbi_manta, + tbi_mpileup, + tbi_sentieon_haplotyper, + tbi_strelka, + tbi_tiddit + ) + + emit: + gvcf_sentieon_dnascope + gvcf_sentieon_haplotyper + out_indexcov + vcf_all + vcf_deepvariant + vcf_freebayes + vcf_haplotypecaller + vcf_manta + vcf_mpileup + vcf_strelka + vcf_sentieon_dnascope + vcf_sentieon_haplotyper + vcf_tiddit + tbi_all + tbi_deepvariant + tbi_freebayes + tbi_haplotypecaller + tbi_manta + tbi_mpileup + tbi_sentieon_dnascope + tbi_sentieon_haplotyper + tbi_strelka + tbi_tiddit + + versions +} diff --git a/subworkflows/local/bam_variant_calling_germline_manta/main.nf b/subworkflows/local/bam_variant_calling_germline_manta/main.nf new file mode 100644 index 0000000000..be68cc9114 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_germline_manta/main.nf @@ -0,0 +1,47 @@ +// +// Manta germline variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { MANTA_GERMLINE } from '../../../modules/nf-core/manta/germline/main' + +// Seems to be the consensus on upstream modules implementation too +workflow BAM_VARIANT_CALLING_GERMLINE_MANTA { + take: + cram // channel: [mandatory] [ meta, cram, crai ] + fasta // channel: [mandatory] [ meta, fasta ] + fasta_fai // channel: [mandatory] [ meta, fasta_fai ] + intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi] or [ [], []] if no intervals; intervals file contains all intervals + + main: + versions = Channel.empty() + + // Combine cram and intervals, account for 0 intervals + cram_intervals = cram.combine(intervals).map{ it -> + def bed_gz = it.size() > 3 ? it[3] : [] + def bed_tbi = it.size() > 3 ? it[4] : [] + + [it[0], it[1], it[2], bed_gz, bed_tbi] + } + + MANTA_GERMLINE(cram_intervals, fasta, fasta_fai, []) + + small_indels_vcf = MANTA_GERMLINE.out.candidate_small_indels_vcf + sv_vcf = MANTA_GERMLINE.out.candidate_sv_vcf + diploid_sv_vcf = MANTA_GERMLINE.out.diploid_sv_vcf + diploid_sv_vcf_tbi = MANTA_GERMLINE.out.diploid_sv_vcf_tbi + + // Only diploid SV should get annotated + // add variantcaller to meta map + vcf = diploid_sv_vcf.map{ meta, vcf -> [ meta + [ variantcaller:'manta' ], vcf ] } + tbi = diploid_sv_vcf_tbi.map{ meta, tbi -> [ meta + [ variantcaller:'manta' ], tbi ] } + + versions = versions.mix(MANTA_GERMLINE.out.versions) + + emit: + vcf + tbi + + versions +} diff --git a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf new file mode 100644 index 0000000000..fe71721fd3 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf @@ -0,0 +1,108 @@ +// +// GATK4 HAPLOTYPACALLER germline variant calling: +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { BAM_MERGE_INDEX_SAMTOOLS } from '../bam_merge_index_samtools/main' +include { GATK4_HAPLOTYPECALLER } from '../../../modules/nf-core/gatk4/haplotypecaller/main' +include { GATK4_MERGEVCFS as MERGE_HAPLOTYPECALLER } from '../../../modules/nf-core/gatk4/mergevcfs/main' + +workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER { + take: + cram // channel: [mandatory] [ meta, cram, crai, interval.bed ] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + dict // channel: [mandatory] + dbsnp // channel: [optional] + dbsnp_tbi // channel: [optional] + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + + main: + versions = Channel.empty() + + vcf = Channel.empty() + realigned_bam = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals = cram.combine(intervals) + // Move num_intervals to meta map + // Add interval_name to allow correct merging with interval files + .map{ meta, cram_, crai, intervals_, num_intervals -> [ meta + [ interval_name:intervals_.baseName, num_intervals:num_intervals, variantcaller:'haplotypecaller' ], cram_, crai, intervals_, [] ] } + + GATK4_HAPLOTYPECALLER( + cram_intervals, + fasta, + fasta_fai, + dict, + dbsnp, + dbsnp_tbi) + + // For joint genotyping + gvcf_tbi_intervals = GATK4_HAPLOTYPECALLER.out.vcf + .join(GATK4_HAPLOTYPECALLER.out.tbi, failOnMismatch: true) + .join(cram_intervals, failOnMismatch: true) + .map{ meta, gvcf, tbi, cram_, crai, intervals_, dragstr_model -> [ meta, gvcf, tbi, intervals_ ] } + + // Figuring out if there is one or more vcf(s) from the same sample + haplotypecaller_vcf = GATK4_HAPLOTYPECALLER.out.vcf.map{ + meta, vcf_ -> [ meta - meta.subMap('interval_name'), vcf_] + } + .branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more tbi(s) from the same sample + haplotypecaller_tbi = GATK4_HAPLOTYPECALLER.out.tbi.map{ + meta, tbi -> [ meta - meta.subMap('interval_name'), tbi] + }.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more bam(s) from the same sample + haplotypecaller_bam = GATK4_HAPLOTYPECALLER.out.bam.map{ + meta, bam -> [ meta - meta.subMap('interval_name'), bam] + }.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + MERGE_HAPLOTYPECALLER(haplotypecaller_vcf.intervals.map{ meta, vcf_ -> [ groupKey(meta, meta.num_intervals), vcf_ ] }.groupTuple(), dict) + + haplotypecaller_vcf = Channel.empty().mix( + MERGE_HAPLOTYPECALLER.out.vcf, + haplotypecaller_vcf.no_intervals) + + haplotypecaller_tbi = Channel.empty().mix( + MERGE_HAPLOTYPECALLER.out.tbi, + haplotypecaller_tbi.no_intervals) + + // BAM output + BAM_MERGE_INDEX_SAMTOOLS(haplotypecaller_bam.intervals + .map{ meta, bam -> [ groupKey(meta, meta.num_intervals), bam ] } + .groupTuple() + .mix(haplotypecaller_bam.no_intervals)) + + realigned_bam = BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai + + versions = versions.mix(GATK4_HAPLOTYPECALLER.out.versions) + versions = versions.mix(MERGE_HAPLOTYPECALLER.out.versions) + + // Remove no longer necessary field: num_intervals + vcf = haplotypecaller_vcf.map{ meta, vcf_ -> [ meta - meta.subMap('num_intervals'), vcf_ ] } + tbi = haplotypecaller_tbi.map{ meta, tbi -> [ meta - meta.subMap('num_intervals'), tbi ] } + + emit: + gvcf_tbi_intervals // For joint genotyping + realigned_bam // Optional + vcf // vcf + tbi // tbi + + versions +} diff --git a/subworkflows/local/bam_variant_calling_indexcov/main.nf b/subworkflows/local/bam_variant_calling_indexcov/main.nf new file mode 100644 index 0000000000..d1bc9f39a9 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_indexcov/main.nf @@ -0,0 +1,44 @@ +// +// Indexcov calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { SAMTOOLS_REINDEX_BAM } from '../../../modules/local/samtools/reindex_bam/main' +include { GOLEFT_INDEXCOV } from '../../../modules/nf-core/goleft/indexcov/main' + +// Seems to be the consensus on upstream modules implementation too +workflow BAM_VARIANT_CALLING_INDEXCOV { + take: + cram // channel: [mandatory] [ meta, cram, crai ] + fasta // channel: [mandatory] [ meta, fasta ] + fasta_fai // channel: [mandatory] [ meta, fasta_fai ] + + main: + versions = Channel.empty() + + // generate a cleaner bam index without duplicate, supplementary, etc. (Small workload because the bam itself is not re-generated) + reindex_ch = SAMTOOLS_REINDEX_BAM( + cram, + fasta, + fasta_fai + ) + + versions = versions.mix(reindex_ch.versions) + + // create [ [id:directory], bams, bais ] + indexcov_input_ch = reindex_ch.output.map{[[id:"indexcov"], it[1], it[2]]}.groupTuple() + + goleft_ch = GOLEFT_INDEXCOV( + indexcov_input_ch, + fasta_fai + ) + + versions = versions.mix(goleft_ch.versions) + + + emit: + + out_indexcov = goleft_ch.output + versions +} diff --git a/subworkflows/local/bam_variant_calling_mpileup/main.nf b/subworkflows/local/bam_variant_calling_mpileup/main.nf new file mode 100644 index 0000000000..26e7f4df2e --- /dev/null +++ b/subworkflows/local/bam_variant_calling_mpileup/main.nf @@ -0,0 +1,81 @@ +// +// MPILEUP variant calling: BCFTOOLS for variantcalling, SAMTools for controlfreec input +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { BCFTOOLS_MPILEUP } from '../../../modules/nf-core/bcftools/mpileup' +include { CAT_CAT as CAT_MPILEUP } from '../../../modules/nf-core/cat/cat' +include { GATK4_MERGEVCFS as MERGE_BCFTOOLS_MPILEUP } from '../../../modules/nf-core/gatk4/mergevcfs' +include { SAMTOOLS_MPILEUP } from '../../../modules/nf-core/samtools/mpileup' + +workflow BAM_VARIANT_CALLING_MPILEUP { + take: + cram // channel: [mandatory] [ meta, cram, crai ] + dict // channel: [mandatory] [ meta, dict ] + fasta // channel: [mandatory] [ fasta ] + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + + main: + versions = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals = cram + .combine(intervals) + .map { meta, cram_, _crai, intervals_, num_intervals -> [meta + [num_intervals: num_intervals], cram_, intervals_] } + + // Run, if --tools mpileup + keep_bcftools_mpileup = false + BCFTOOLS_MPILEUP(cram_intervals, fasta, keep_bcftools_mpileup) + + //Only run, if --tools ControlFreec + SAMTOOLS_MPILEUP(cram_intervals, fasta) + + // Figuring out if there is one or more vcf(s) from the same sample + vcf_mpileup = BCFTOOLS_MPILEUP.out.vcf.branch { + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more tbi(s) from the same sample + tbi_mpileup = BCFTOOLS_MPILEUP.out.tbi.branch { + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more mpileup(s) from the same sample + mpileup_samtools = SAMTOOLS_MPILEUP.out.mpileup.branch { + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Merge mpileup and natural order sort them + mpileup_to_merge = mpileup_samtools.intervals.map { meta, pileup -> [groupKey(meta, meta.num_intervals), pileup] }.groupTuple(sort: true) + CAT_MPILEUP(mpileup_to_merge) + + // Merge VCF + vcf_to_merge = vcf_mpileup.intervals.map { meta, vcf -> [groupKey(meta, meta.num_intervals), vcf] }.groupTuple() + MERGE_BCFTOOLS_MPILEUP(vcf_to_merge, dict) + + // Mix intervals and no_intervals channels together + mpileup = CAT_MPILEUP.out.file_out + .mix(mpileup_samtools.no_intervals) + .map { meta, mpileup -> [meta - meta.subMap('num_intervals') + [variantcaller: 'samtools'], mpileup] } + vcf = MERGE_BCFTOOLS_MPILEUP.out.vcf + .mix(vcf_mpileup.no_intervals) + .map { meta, vcf -> [meta - meta.subMap('num_intervals') + [variantcaller: 'bcftools'], vcf] } + tbi = MERGE_BCFTOOLS_MPILEUP.out.tbi + .mix(tbi_mpileup.no_intervals) + .map { meta, tbi -> [meta - meta.subMap('num_intervals') + [variantcaller: 'bcftools'], tbi] } + + versions = versions.mix(SAMTOOLS_MPILEUP.out.versions) + versions = versions.mix(BCFTOOLS_MPILEUP.out.versions) + versions = versions.mix(CAT_MPILEUP.out.versions) + versions = versions.mix(MERGE_BCFTOOLS_MPILEUP.out.versions) + + emit: + mpileup + vcf + tbi + versions +} diff --git a/subworkflows/local/bam_variant_calling_sentieon_dnascope/main.nf b/subworkflows/local/bam_variant_calling_sentieon_dnascope/main.nf new file mode 100644 index 0000000000..6085ad0c49 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_sentieon_dnascope/main.nf @@ -0,0 +1,157 @@ +// +// SENTIEON HAPLOTYPER germline variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { GATK4_MERGEVCFS as MERGE_SENTIEON_DNASCOPE_GVCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_SENTIEON_DNASCOPE_VCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { SENTIEON_DNASCOPE } from '../../../modules/nf-core/sentieon/dnascope/main' + +workflow BAM_VARIANT_CALLING_SENTIEON_DNASCOPE { + take: + cram // channel: [mandatory] [ meta, cram, crai, interval.bed ] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + dict // channel: [mandatory] + dbsnp // channel: [optional] + dbsnp_tbi // channel: [optional] + dbsnp_vqsr // channel: [optional] + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + joint_germline // boolean: [mandatory] [default: false] joint calling of germline variants + sentieon_dnascope_emit_mode // string + sentieon_dnascope_pcr_indel_model // string + sentieon_dnascope_model // channel + + main: + versions = Channel.empty() + + gvcf = Channel.empty() + vcf = Channel.empty() + genotype_intervals = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals_for_sentieon = cram.combine(intervals) + // Move num_intervals to meta map + .map{ meta, cram_, crai, intervals_, num_intervals -> [ + meta + [ + num_intervals:num_intervals, + intervals_name:intervals_.simpleName, + variantcaller:'sentieon_dnascope'], + cram_, + crai, + intervals_ + ] + } + + emit_mode_items = sentieon_dnascope_emit_mode.split(',').each{ it -> it.toLowerCase().trim() } + lst = emit_mode_items - 'gvcf' + emit_vcf = lst.size() > 0 ? lst[0] : '' + + SENTIEON_DNASCOPE( + cram_intervals_for_sentieon, + fasta, + fasta_fai, + dbsnp.map{it -> [[:], it]}, + dbsnp_tbi.map{it -> [[:], it]}, + sentieon_dnascope_model.map{it -> [[:], it]}, + sentieon_dnascope_pcr_indel_model, + emit_vcf, + emit_mode_items.any{ it.equals('gvcf') }) + + if (joint_germline) { + genotype_intervals = SENTIEON_DNASCOPE.out.gvcf + .join(SENTIEON_DNASCOPE.out.gvcf_tbi, failOnMismatch: true) + .join(cram_intervals_for_sentieon, failOnMismatch: true) + .map{ meta, gvcf_, tbi, cram_, crai, intervals_ -> [ meta, gvcf_, tbi, intervals_ ] } + } + + // Figure out if using intervals or no_intervals + dnascope_vcf_branch = SENTIEON_DNASCOPE.out.vcf.map{ + meta, vcf_ -> [ meta - meta.subMap('interval_name'), vcf_] + } + .branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + dnascope_vcf_tbi_branch = SENTIEON_DNASCOPE.out.vcf_tbi.map{ + meta, vcf_tbi -> [ meta - meta.subMap('interval_name'), vcf_tbi] + } + .branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + haplotyper_gvcf_branch = SENTIEON_DNASCOPE.out.gvcf.map{ + meta, gvcf_ -> [ meta - meta.subMap('interval_name'), gvcf_] + } + .branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + haplotyper_gvcf_tbi_branch = SENTIEON_DNASCOPE.out.gvcf_tbi.map{ + meta, gvcf_tbi -> [ meta - meta.subMap('interval_name'), gvcf_tbi] + } + .branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + vcfs_for_merging = dnascope_vcf_branch.intervals.map{ + meta, vcf_ -> [ groupKey(meta, meta.num_intervals), vcf_ ]} + + vcfs_for_merging = vcfs_for_merging.map{ + meta, vcf_ -> [ + meta - meta.subMap('intervals_name'), + vcf_]}.groupTuple() + + // VCFs + // Only when using intervals + MERGE_SENTIEON_DNASCOPE_VCFS(vcfs_for_merging, dict) + + dnascope_vcf = Channel.empty().mix( + MERGE_SENTIEON_DNASCOPE_VCFS.out.vcf, + dnascope_vcf_branch.no_intervals) + + haplotyper_tbi = Channel.empty().mix( + MERGE_SENTIEON_DNASCOPE_VCFS.out.tbi, + dnascope_vcf_tbi_branch.no_intervals) + + // Remove no longer necessary field: num_intervals + vcf = dnascope_vcf.map{ meta, vcf_ -> [ meta - meta.subMap('num_intervals'), vcf_ ] } + vcf_tbi = haplotyper_tbi.map{ meta, tbi -> [ meta - meta.subMap('num_intervals'), tbi ] } + + // GVFs + // Only when using intervals + gvcfs_for_merging = haplotyper_gvcf_branch.intervals.map{ + meta, vcf_ -> [groupKey(meta, meta.num_intervals), vcf_]} + + gvcfs_for_merging = gvcfs_for_merging.map{ + meta, vcf_ -> [ meta - meta.subMap('intervals_name'), vcf_ ] + }.groupTuple() + + MERGE_SENTIEON_DNASCOPE_GVCFS(gvcfs_for_merging, dict) + + gvcf = Channel.empty().mix( + MERGE_SENTIEON_DNASCOPE_GVCFS.out.vcf, + haplotyper_gvcf_branch.no_intervals) + + gvcf_tbi = Channel.empty().mix( + MERGE_SENTIEON_DNASCOPE_GVCFS.out.tbi, + haplotyper_gvcf_tbi_branch.no_intervals) + + versions = versions.mix(SENTIEON_DNASCOPE.out.versions) + versions = versions.mix(MERGE_SENTIEON_DNASCOPE_VCFS.out.versions) + versions = versions.mix(MERGE_SENTIEON_DNASCOPE_GVCFS.out.versions) + + emit: + versions + vcf + vcf_tbi + gvcf + gvcf_tbi + genotype_intervals // For joint genotyping + +} diff --git a/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf b/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf new file mode 100644 index 0000000000..fe8a33a2e3 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf @@ -0,0 +1,153 @@ +// +// SENTIEON HAPLOTYPER germline variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { GATK4_MERGEVCFS as MERGE_SENTIEON_HAPLOTYPER_GVCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_SENTIEON_HAPLOTYPER_VCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { SENTIEON_HAPLOTYPER } from '../../../modules/nf-core/sentieon/haplotyper/main' + +workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER { + take: + cram // channel: [mandatory] [ meta, cram, crai, interval.bed ] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + dict // channel: [mandatory] + dbsnp // channel: [optional] + dbsnp_tbi // channel: [optional] + dbsnp_vqsr // channel: [optional] + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + joint_germline // boolean: [mandatory] [default: false] joint calling of germline variants + sentieon_haplotyper_emit_mode + + main: + versions = Channel.empty() + + gvcf = Channel.empty() + vcf = Channel.empty() + genotype_intervals = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals_for_sentieon = cram.combine(intervals) + // Move num_intervals to meta map + .map{ meta, cram_, crai, intervals_, num_intervals -> [ + meta + [ + num_intervals:num_intervals, + intervals_name:intervals_.baseName, + variantcaller:'sentieon_haplotyper'], + cram_, + crai, + intervals_ + ] + } + + emit_mode_items = sentieon_haplotyper_emit_mode.split(',').each{ it -> it.toLowerCase().trim() } + lst = emit_mode_items - 'gvcf' + emit_vcf = lst.size() > 0 ? lst[0] : '' + + SENTIEON_HAPLOTYPER( + cram_intervals_for_sentieon.map{ meta, cram_, crai, intervals_ -> [ meta, cram_, crai, intervals_, [] ]}, + fasta, + fasta_fai, + dbsnp.map{file -> [[id:'dbsnp'], file]}, + dbsnp_tbi.map{file -> [[id:'dbsnp'], file]}, + emit_vcf, + emit_mode_items.any{ it.equals('gvcf') }) + + if (joint_germline) { + genotype_intervals = SENTIEON_HAPLOTYPER.out.gvcf + .join(SENTIEON_HAPLOTYPER.out.gvcf_tbi, failOnMismatch: true) + .join(cram_intervals_for_sentieon, failOnMismatch: true) + .map{ meta, gvcf_, tbi, cram_, crai, intervals_ -> [ meta, gvcf_, tbi, intervals_ ] } + } + + // Figure out if using intervals or no_intervals + haplotyper_vcf_branch = SENTIEON_HAPLOTYPER.out.vcf.map{ + meta, vcf_ -> [ meta - meta.subMap('interval_name'), vcf_] + } + .branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + haplotyper_vcf_tbi_branch = SENTIEON_HAPLOTYPER.out.vcf_tbi.map{ + meta, vcf_tbi -> [ meta - meta.subMap('interval_name'), vcf_tbi] + } + .branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + haplotyper_gvcf_branch = SENTIEON_HAPLOTYPER.out.gvcf.map{ + meta, gvcf_ -> [ meta - meta.subMap('interval_name'), gvcf_] + } + .branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + haplotyper_gvcf_tbi_branch = SENTIEON_HAPLOTYPER.out.gvcf_tbi.map{ + meta, gvcf_tbi -> [ meta - meta.subMap('interval_name'), gvcf_tbi] + } + .branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + vcfs_for_merging = haplotyper_vcf_branch.intervals.map{ + meta, vcf_ -> [ groupKey(meta, meta.num_intervals), vcf_ ]} + + vcfs_for_merging = vcfs_for_merging.map{ + meta, vcf_ -> [ + meta - meta.subMap('intervals_name'), + vcf_]}.groupTuple() + + // VCFs + // Only when using intervals + MERGE_SENTIEON_HAPLOTYPER_VCFS(vcfs_for_merging, dict) + + haplotyper_vcf = Channel.empty().mix( + MERGE_SENTIEON_HAPLOTYPER_VCFS.out.vcf, + haplotyper_vcf_branch.no_intervals) + + haplotyper_tbi = Channel.empty().mix( + MERGE_SENTIEON_HAPLOTYPER_VCFS.out.tbi, + haplotyper_vcf_tbi_branch.no_intervals) + + // Remove no longer necessary field: num_intervals + vcf = haplotyper_vcf.map{ meta, vcf_ -> [ meta - meta.subMap('num_intervals'), vcf_ ] } + vcf_tbi = haplotyper_tbi.map{ meta, tbi -> [ meta - meta.subMap('num_intervals'), tbi ] } + + // GVFs + // Only when using intervals + gvcfs_for_merging = haplotyper_gvcf_branch.intervals.map{ + meta, vcf_ -> [groupKey(meta, meta.num_intervals), vcf_]} + + gvcfs_for_merging = gvcfs_for_merging.map{ + meta, vcf_ -> [ meta - meta.subMap('intervals_name'), vcf_ ] + }.groupTuple() + + MERGE_SENTIEON_HAPLOTYPER_GVCFS(gvcfs_for_merging, dict) + + gvcf = Channel.empty().mix( + MERGE_SENTIEON_HAPLOTYPER_GVCFS.out.vcf, + haplotyper_gvcf_branch.no_intervals) + + gvcf_tbi = Channel.empty().mix( + MERGE_SENTIEON_HAPLOTYPER_GVCFS.out.tbi, + haplotyper_gvcf_tbi_branch.no_intervals) + + versions = versions.mix(SENTIEON_HAPLOTYPER.out.versions) + versions = versions.mix(MERGE_SENTIEON_HAPLOTYPER_VCFS.out.versions) + versions = versions.mix(MERGE_SENTIEON_HAPLOTYPER_GVCFS.out.versions) + + emit: + versions + vcf + vcf_tbi + gvcf + gvcf_tbi + genotype_intervals // For joint genotyping + +} diff --git a/subworkflows/local/bam_variant_calling_single_strelka/main.nf b/subworkflows/local/bam_variant_calling_single_strelka/main.nf new file mode 100644 index 0000000000..2ba47a8401 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_single_strelka/main.nf @@ -0,0 +1,76 @@ +// +// STRELKA2 single sample variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { GATK4_MERGEVCFS as MERGE_STRELKA } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_STRELKA_GENOME } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { STRELKA_GERMLINE as STRELKA_SINGLE } from '../../../modules/nf-core/strelka/germline/main' + +workflow BAM_VARIANT_CALLING_SINGLE_STRELKA { + take: + cram // channel: [mandatory] [ meta, cram, crai ] + dict // channel: [optional] [ meta, dict ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi, num_intervals ] or [ [], [], 0 ] if no intervals + + main: + versions = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals = cram.combine(intervals) + // Move num_intervals to meta map + .map{ meta, cram_, crai, intervals_, intervals_index, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram_, crai, intervals_, intervals_index ] } + + STRELKA_SINGLE(cram_intervals, fasta, fasta_fai) + + // Figuring out if there is one or more vcf(s) from the same sample + genome_vcf = STRELKA_SINGLE.out.genome_vcf.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more vcf(s) from the same sample + vcf_out = STRELKA_SINGLE.out.vcf.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more tbi(s) from the same sample + tbi_out = STRELKA_SINGLE.out.vcf_tbi.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + genome_vcf_to_merge = genome_vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + vcf_to_merge = vcf_out.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + + MERGE_STRELKA(vcf_to_merge, dict) + MERGE_STRELKA_GENOME(genome_vcf_to_merge, dict) + + // Mix intervals and no_intervals channels together + // Only strelka variant vcf should get annotated + vcf = Channel.empty().mix(MERGE_STRELKA.out.vcf, vcf_out.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'strelka' ], vcf ] } + + tbi = Channel.empty().mix(MERGE_STRELKA.out.tbi, tbi_out.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, tbi -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'strelka' ], tbi ] } + + versions = versions.mix(MERGE_STRELKA.out.versions) + versions = versions.mix(MERGE_STRELKA_GENOME.out.versions) + versions = versions.mix(STRELKA_SINGLE.out.versions) + + emit: + vcf + tbi + + versions +} diff --git a/subworkflows/local/bam_variant_calling_single_tiddit/main.nf b/subworkflows/local/bam_variant_calling_single_tiddit/main.nf new file mode 100644 index 0000000000..ad101d1590 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_single_tiddit/main.nf @@ -0,0 +1,34 @@ +// +// TIDDIT single sample variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../../modules/nf-core/tabix/bgziptabix/main' +include { TIDDIT_SV } from '../../../modules/nf-core/tiddit/sv/main' + +workflow BAM_VARIANT_CALLING_SINGLE_TIDDIT { + take: + cram + fasta + bwa + + main: + versions = channel.empty() + + TIDDIT_SV(cram, fasta, bwa) + + TABIX_BGZIP_TIDDIT_SV(TIDDIT_SV.out.vcf) + + ploidy = TIDDIT_SV.out.ploidy + vcf = TABIX_BGZIP_TIDDIT_SV.out.gz_index.map { meta, gz, tbi -> [meta + [variantcaller: 'tiddit'], gz] } + tbi = TABIX_BGZIP_TIDDIT_SV.out.gz_index.map { meta, gz, tbi -> [meta + [variantcaller: 'tiddit'], tbi] } + + versions = versions.mix(TIDDIT_SV.out.versions) + + emit: + ploidy + vcf + tbi + versions +} diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf new file mode 100644 index 0000000000..d6867e8596 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_somatic_all/main.nf @@ -0,0 +1,331 @@ +// +// PAIRED VARIANT CALLING +// + +include { BAM_VARIANT_CALLING_CNVKIT } from '../bam_variant_calling_cnvkit' +include { BAM_VARIANT_CALLING_FREEBAYES } from '../bam_variant_calling_freebayes' +include { BAM_VARIANT_CALLING_INDEXCOV } from '../bam_variant_calling_indexcov' +include { BAM_VARIANT_CALLING_MPILEUP as MPILEUP_NORMAL } from '../bam_variant_calling_mpileup' +include { BAM_VARIANT_CALLING_MPILEUP as MPILEUP_TUMOR } from '../bam_variant_calling_mpileup' +include { BAM_VARIANT_CALLING_SOMATIC_ASCAT } from '../bam_variant_calling_somatic_ascat' +include { BAM_VARIANT_CALLING_SOMATIC_CONTROLFREEC } from '../bam_variant_calling_somatic_controlfreec' +include { BAM_VARIANT_CALLING_SOMATIC_MANTA } from '../bam_variant_calling_somatic_manta' +include { BAM_VARIANT_CALLING_SOMATIC_MUSE } from '../bam_variant_calling_somatic_muse' +include { BAM_VARIANT_CALLING_SOMATIC_MUTECT2 } from '../bam_variant_calling_somatic_mutect2' +include { BAM_VARIANT_CALLING_SOMATIC_STRELKA } from '../bam_variant_calling_somatic_strelka' +include { BAM_VARIANT_CALLING_SOMATIC_TIDDIT } from '../bam_variant_calling_somatic_tiddit' +include { BAM_VARIANT_CALLING_SOMATIC_TNSCOPE } from '../bam_variant_calling_somatic_tnscope' +include { MSISENSOR2_MSI } from '../../../modules/nf-core/msisensor2/msi' +include { MSISENSORPRO_MSISOMATIC } from '../../../modules/nf-core/msisensorpro/msisomatic' + +workflow BAM_VARIANT_CALLING_SOMATIC_ALL { + take: + tools // Mandatory, list of tools to apply + bam // channel: [mandatory] bam + cram // channel: [mandatory] cram + bwa // channel: [optional] bwa + cf_chrom_len // channel: [optional] controlfreec length file + chr_files + dbsnp // channel: [mandatory] dbsnp + dbsnp_tbi // channel: [mandatory] dbsnp_tbi + dict // channel: [mandatory] dict + fasta // channel: [mandatory] fasta + fasta_fai // channel: [mandatory] fasta_fai + germline_resource // channel: [optional] germline_resource + germline_resource_tbi // channel: [optional] germline_resource_tbi + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed + intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped + intervals_bed_gz_tbi_combined // channel: [mandatory] intervals/target regions in one file zipped + mappability + msisensorpro_scan // channel: [optional] msisensorpro_scan + panel_of_normals // channel: [optional] panel_of_normals + panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi + allele_files // channel: [optional] ascat allele files + loci_files // channel: [optional] ascat loci files + gc_file // channel: [optional] ascat gc content file + rt_file // channel: [optional] ascat rt file + joint_mutect2 // boolean: [mandatory] [default: false] run mutect2 in joint mode + wes // boolean: [mandatory] [default: false] whether targeted data is processed + + main: + // channels are often remapped to match module/subworkflow + + // Gather all versions + versions = channel.empty() + + //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config + out_indexcov = channel.empty() + out_msisensorpro = channel.empty() + vcf_freebayes = channel.empty() + vcf_manta = channel.empty() + vcf_muse = channel.empty() + vcf_mutect2 = channel.empty() + vcf_strelka = channel.empty() + vcf_tiddit = channel.empty() + vcf_tnscope = channel.empty() + tbi_freebayes = channel.empty() + tbi_manta = channel.empty() + tbi_muse = channel.empty() + tbi_mutect2 = channel.empty() + tbi_strelka = channel.empty() + tbi_tiddit = channel.empty() + tbi_tnscope = channel.empty() + + if (tools && tools.split(',').contains('ascat')) { + BAM_VARIANT_CALLING_SOMATIC_ASCAT( + cram, + allele_files, + loci_files, + (wes ? intervals_bed_combined : []), + fasta.map { _meta, fasta_ -> [fasta_] }, + gc_file, + rt_file, + ) + + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_ASCAT.out.versions) + } + + // CONTROLFREEC + if (tools && tools.split(',').contains('controlfreec')) { + cram_normal = cram.map { meta, normal_cram, normal_crai, _tumor_cram, _tumor_crai -> [meta, normal_cram, normal_crai] } + cram_tumor = cram.map { meta, _normal_cram, _normal_crai, tumor_cram, tumor_crai -> [meta, tumor_cram, tumor_crai] } + + MPILEUP_NORMAL( + cram_normal, + dict, + fasta, + intervals, + ) + + MPILEUP_TUMOR( + cram_tumor, + dict, + fasta, + intervals, + ) + + mpileup_normal = MPILEUP_NORMAL.out.mpileup + mpileup_tumor = MPILEUP_TUMOR.out.mpileup + mpileup_pair = mpileup_normal.cross(mpileup_tumor).map { normal, tumor -> [normal[0], normal[1], tumor[1], [], [], [], []] } + + BAM_VARIANT_CALLING_SOMATIC_CONTROLFREEC( + mpileup_pair, + fasta.map { _meta, fasta_ -> [fasta_] }, + cf_chrom_len ?: fasta_fai.map { _meta, fasta_fai_ -> [fasta_fai_] }, + dbsnp, + dbsnp_tbi, + chr_files, + mappability, + wes ? intervals_bed_combined : [], + ) + + versions = versions.mix(MPILEUP_NORMAL.out.versions) + versions = versions.mix(MPILEUP_TUMOR.out.versions) + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_CONTROLFREEC.out.versions) + } + + // CNVKIT + if (tools && tools.split(',').contains('cnvkit')) { + BAM_VARIANT_CALLING_CNVKIT( + bam.map { meta, normal_bam, _normal_bai, tumor_bam, _tumor_bai -> [meta, tumor_bam, normal_bam] }, + fasta, + fasta_fai, + intervals_bed_combined.map { _intervals -> _intervals ? [[id: _intervals[0].baseName], _intervals] : [[id: 'no_intervals'], []] }, + [[id: "null"], []], + ) + + versions = versions.mix(BAM_VARIANT_CALLING_CNVKIT.out.versions) + } + + // FREEBAYES + if (tools && tools.split(',').contains('freebayes')) { + BAM_VARIANT_CALLING_FREEBAYES( + cram, + dict, + fasta, + fasta_fai, + intervals, + ) + + vcf_freebayes = BAM_VARIANT_CALLING_FREEBAYES.out.vcf + tbi_freebayes = BAM_VARIANT_CALLING_FREEBAYES.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_FREEBAYES.out.versions) + } + + // MANTA + if (tools && tools.split(',').contains('manta')) { + BAM_VARIANT_CALLING_SOMATIC_MANTA( + cram, + fasta, + fasta_fai, + intervals_bed_gz_tbi_combined, + ) + + vcf_manta = BAM_VARIANT_CALLING_SOMATIC_MANTA.out.vcf + tbi_manta = BAM_VARIANT_CALLING_SOMATIC_MANTA.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_MANTA.out.versions) + } + + + // INDEXCOV + // WGS only + if (params.wes == false && tools.split(',').contains('indexcov')) { + BAM_VARIANT_CALLING_INDEXCOV( + cram, + fasta, + fasta_fai, + ) + + out_indexcov = BAM_VARIANT_CALLING_INDEXCOV.out.out_indexcov + versions = versions.mix(BAM_VARIANT_CALLING_INDEXCOV.out.versions) + } + + + // STRELKA + if (tools && tools.split(',').contains('strelka')) { + cram_strelka = tools.split(',').contains('manta') + ? cram.join(BAM_VARIANT_CALLING_SOMATIC_MANTA.out.candidate_small_indels_vcf, failOnDuplicate: true, failOnMismatch: true).join(BAM_VARIANT_CALLING_SOMATIC_MANTA.out.candidate_small_indels_vcf_tbi, failOnDuplicate: true, failOnMismatch: true) + : cram.map { meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [meta, normal_cram, normal_crai, tumor_cram, tumor_crai, [], []] } + + BAM_VARIANT_CALLING_SOMATIC_STRELKA( + cram_strelka, + dict, + fasta.map { _meta, fasta_ -> [fasta_] }, + fasta_fai.map { _meta, fasta_fai_ -> [fasta_fai_] }, + intervals_bed_gz_tbi, + ) + + vcf_strelka = BAM_VARIANT_CALLING_SOMATIC_STRELKA.out.vcf + tbi_strelka = BAM_VARIANT_CALLING_SOMATIC_STRELKA.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_STRELKA.out.versions) + } + + // MSISENSORPRO + if (tools && tools.split(',').contains('msisensorpro')) { + MSISENSORPRO_MSISOMATIC(cram.combine(intervals_bed_combined), fasta, msisensorpro_scan) + + versions = versions.mix(MSISENSORPRO_MSISOMATIC.out.versions) + out_msisensorpro = out_msisensorpro.mix(MSISENSORPRO_MSISOMATIC.out.output_report) + } + + // MuSE + if (tools && tools.split(',').contains('muse')) { + BAM_VARIANT_CALLING_SOMATIC_MUSE( + bam.map { meta, normal_bam, normal_bai, _tumor_bam, _tumor_bai -> [meta, normal_bam, normal_bai] }, + bam.map { meta, _normal_bam, _normal_bai, tumor_bam, tumor_bai -> [meta, tumor_bam, tumor_bai] }, + fasta, + dbsnp, + dbsnp_tbi, + ) + + vcf_muse = BAM_VARIANT_CALLING_SOMATIC_MUSE.out.vcf + tbi_muse = BAM_VARIANT_CALLING_SOMATIC_MUSE.out.tbi + } + + // MUTECT2 + if (tools && tools.split(',').contains('mutect2')) { + // joint_mutect2 mode needs different meta.map than regular mode + // we need to keep all fields and then remove on a per-tool-basis to ensure proper joining at the filtering step + BAM_VARIANT_CALLING_SOMATIC_MUTECT2( + cram.map { meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> + joint_mutect2 + ? [meta + [id: meta.patient], [normal_cram, tumor_cram], [normal_crai, tumor_crai]] + : [meta, [normal_cram, tumor_cram], [normal_crai, tumor_crai]] + }, + fasta, + fasta_fai, + dict, + germline_resource, + germline_resource_tbi, + panel_of_normals, + panel_of_normals_tbi, + intervals, + joint_mutect2, + ) + + // vcf_mutect2 and tbi_mutect2 always contain usable output (filtered if available, otherwise unfiltered) + vcf_mutect2 = BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.vcf + tbi_mutect2 = BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_MUTECT2.out.versions) + } + + // TNSCOPE + if (tools && tools.split(',').contains('sentieon_tnscope')) { + + BAM_VARIANT_CALLING_SOMATIC_TNSCOPE( + cram.map { meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> + [meta, [normal_cram, tumor_cram], [normal_crai, tumor_crai]] + }, + fasta, + fasta_fai, + dict, + germline_resource, + germline_resource_tbi, + panel_of_normals, + panel_of_normals_tbi, + intervals, + ) + + vcf_tnscope = BAM_VARIANT_CALLING_SOMATIC_TNSCOPE.out.vcf + tbi_tnscope = BAM_VARIANT_CALLING_SOMATIC_TNSCOPE.out.index + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_TNSCOPE.out.versions) + } + + // TIDDIT + if (tools && tools.split(',').contains('tiddit')) { + BAM_VARIANT_CALLING_SOMATIC_TIDDIT( + cram.map { meta, normal_cram, normal_crai, _tumor_cram, _tumor_crai -> [meta, normal_cram, normal_crai] }, + cram.map { meta, _normal_cram, _normal_crai, tumor_cram, tumor_crai -> [meta, tumor_cram, tumor_crai] }, + fasta, + bwa, + ) + + vcf_tiddit = BAM_VARIANT_CALLING_SOMATIC_TIDDIT.out.vcf + tbi_tiddit = BAM_VARIANT_CALLING_SOMATIC_TIDDIT.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_TIDDIT.out.versions) + } + + vcf_all = channel.empty() + .mix( + vcf_freebayes, + vcf_manta, + vcf_muse, + vcf_mutect2, + vcf_strelka, + vcf_tiddit, + vcf_tnscope, + ) + + tbi_all = channel.empty() + .mix( + tbi_freebayes, + tbi_manta, + tbi_muse, + tbi_mutect2, + tbi_strelka, + tbi_tiddit, + tbi_tnscope, + ) + + emit: + out_indexcov + out_msisensorpro + vcf_all + vcf_freebayes + vcf_manta + vcf_muse + vcf_mutect2 + vcf_strelka + vcf_tiddit + vcf_tnscope + tbi_all + tbi_freebayes + tbi_manta + tbi_muse + tbi_mutect2 + tbi_strelka + tbi_tiddit + tbi_tnscope + versions +} diff --git a/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf b/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf new file mode 100644 index 0000000000..22802cfb58 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf @@ -0,0 +1,30 @@ +// +// ASCAT variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { ASCAT } from '../../../modules/nf-core/ascat/main' + +workflow BAM_VARIANT_CALLING_SOMATIC_ASCAT { + + take: + cram_pair // channel: [mandatory] [meta, normal_cram, normal_crai, tumor_cram, tumor_crai] + allele_files // channel: [mandatory] zip + loci_files // channel: [mandatory] zip + intervals_bed // channel: [optional] bed for WES + fasta // channel: [optional] fasta needed for cram + gc_file // channel: [optional] txt for LogRCorrection + rt_file // channel: [optional] txt for LogRCorrection + + main: + + ch_versions = Channel.empty() + + ASCAT(cram_pair, allele_files, loci_files, intervals_bed, fasta, gc_file, rt_file) + + ch_versions = ch_versions.mix(ASCAT.out.versions) + + emit: + versions = ch_versions +} diff --git a/subworkflows/local/bam_variant_calling_somatic_controlfreec/main.nf b/subworkflows/local/bam_variant_calling_somatic_controlfreec/main.nf new file mode 100644 index 0000000000..3be4da8e45 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_somatic_controlfreec/main.nf @@ -0,0 +1,66 @@ +// +// CONTROLFREEC somatc variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { CONTROLFREEC_FREEC as FREEC_SOMATIC } from '../../../modules/nf-core/controlfreec/freec/main' +include { CONTROLFREEC_ASSESSSIGNIFICANCE as ASSESS_SIGNIFICANCE } from '../../../modules/nf-core/controlfreec/assesssignificance/main' +include { CONTROLFREEC_FREEC2BED as FREEC2BED } from '../../../modules/nf-core/controlfreec/freec2bed/main' +include { CONTROLFREEC_FREEC2CIRCOS as FREEC2CIRCOS } from '../../../modules/nf-core/controlfreec/freec2circos/main' +include { CONTROLFREEC_MAKEGRAPH2 as MAKEGRAPH2 } from '../../../modules/nf-core/controlfreec/makegraph2/main' + +workflow BAM_VARIANT_CALLING_SOMATIC_CONTROLFREEC { + take: + controlfreec_input // channel: [mandatory] [meta, pileup_normal, pileup_tumor, [], [], [], []] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + dbsnp // channel: [mandatory] + dbsnp_tbi // channel: [mandatory] + chr_files // channel: [mandatory] + mappability // channel: [mandatory] + intervals_bed // channel: [optional] Contains a bed file of all intervals combined provided with the cram input(s). Should be empty for WGS + + main: + + ch_versions = Channel.empty() + + FREEC_SOMATIC(controlfreec_input, fasta, fasta_fai, [], dbsnp, dbsnp_tbi, chr_files, mappability, intervals_bed, []) + + //Filter the files that come out of freec somatic as ASSESS_SIGNIFICANCE only takes one cnv and one ratio file + //Creates empty channel if file is missing + cnv_files = FREEC_SOMATIC.out.CNV + .map{ meta, cnv -> + def tumor_file = cnv instanceof List ? cnv.find { it.toString().endsWith("gz_CNVs") } : cnv //only find if its a list, else it returns only the filename without the path + if (!tumor_file){ + error("CNVs tumor file not found for sample $meta.id") + } + [meta,tumor_file] + } + + ratio_files = FREEC_SOMATIC.out.ratio + .map{ meta, ratio -> + def tumor_file = ratio instanceof List ? ratio.find { it.toString().endsWith("gz_ratio.txt") } : ratio //same here as cnv + if (!tumor_file){ + error("Ratio tumor file not found for sample $meta.id") + } + [meta,tumor_file] + } + + //Join the pairs + assess_significance_input = cnv_files.join(ratio_files, failOnDuplicate: true, failOnMismatch: true) + + ASSESS_SIGNIFICANCE(assess_significance_input) + FREEC2BED(FREEC_SOMATIC.out.ratio) + FREEC2CIRCOS(FREEC_SOMATIC.out.ratio) + MAKEGRAPH2(FREEC_SOMATIC.out.ratio.join(FREEC_SOMATIC.out.BAF, failOnDuplicate: true, failOnMismatch: true)) + + ch_versions = ch_versions.mix(FREEC_SOMATIC.out.versions) + ch_versions = ch_versions.mix(ASSESS_SIGNIFICANCE.out.versions) + ch_versions = ch_versions.mix(FREEC2BED.out.versions) + ch_versions = ch_versions.mix(FREEC2CIRCOS.out.versions) + ch_versions = ch_versions.mix(MAKEGRAPH2.out.versions) + + emit: + versions = ch_versions +} diff --git a/subworkflows/local/bam_variant_calling_somatic_manta/main.nf b/subworkflows/local/bam_variant_calling_somatic_manta/main.nf new file mode 100644 index 0000000000..4d1edfaf59 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_somatic_manta/main.nf @@ -0,0 +1,51 @@ +// +// MANTA somatic variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { MANTA_SOMATIC } from '../../../modules/nf-core/manta/somatic/main' + +workflow BAM_VARIANT_CALLING_SOMATIC_MANTA { + take: + cram // channel: [mandatory] [ meta, cram1, crai1, cram2, crai2 ] + fasta // channel: [mandatory] [ meta, fasta ] + fasta_fai // channel: [mandatory] [ meta, fasta_fai ] + intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi ] or [ [], [] ] if no intervals + + main: + versions = Channel.empty() + + // Combine cram and intervals, account for 0 intervals + cram_intervals = cram.combine(intervals).map{ it -> + def bed_gz = it.size() > 5 ? it[5] : [] + def bed_tbi = it.size() > 5 ? it[6] : [] + + [it[0], it[1], it[2], it[3], it[4], bed_gz, bed_tbi] + } + + MANTA_SOMATIC(cram_intervals, fasta, fasta_fai, []) + + candidate_small_indels_vcf = MANTA_SOMATIC.out.candidate_small_indels_vcf + candidate_small_indels_vcf_tbi = MANTA_SOMATIC.out.candidate_small_indels_vcf_tbi + candidate_sv_vcf = MANTA_SOMATIC.out.candidate_sv_vcf + diploid_sv_vcf = MANTA_SOMATIC.out.diploid_sv_vcf + diploid_sv_vcf_tbi = MANTA_SOMATIC.out.diploid_sv_vcf_tbi + somatic_sv_vcf = MANTA_SOMATIC.out.somatic_sv_vcf + somatic_sv_vcf_tbi = MANTA_SOMATIC.out.somatic_sv_vcf_tbi + + // Only diploid and somatic SV should get annotated + // add variantcaller to meta map + vcf = Channel.empty().mix(diploid_sv_vcf, somatic_sv_vcf).map{ meta, vcf -> [ meta + [ variantcaller:'manta' ], vcf ] } + tbi = Channel.empty().mix(diploid_sv_vcf_tbi, somatic_sv_vcf_tbi).map{ meta, tbi -> [ meta + [ variantcaller:'manta' ], tbi ] } + + versions = versions.mix(MANTA_SOMATIC.out.versions) + + emit: + candidate_small_indels_vcf + candidate_small_indels_vcf_tbi + vcf + tbi + + versions +} diff --git a/subworkflows/local/bam_variant_calling_somatic_muse/main.nf b/subworkflows/local/bam_variant_calling_somatic_muse/main.nf new file mode 100644 index 0000000000..58a98c021e --- /dev/null +++ b/subworkflows/local/bam_variant_calling_somatic_muse/main.nf @@ -0,0 +1,33 @@ +// +// MuSE tumor-normal variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { MUSE_CALL } from '../../../modules/nf-core/muse/call' +include { MUSE_SUMP } from '../../../modules/nf-core/muse/sump' + +workflow BAM_VARIANT_CALLING_SOMATIC_MUSE { + take: + bam_normal // channel: [mandatory] [ meta, normal_bam, normal_bai] + bam_tumor // channel: [mandatory] [ meta, tumor_bam, tumor_bai] + fasta // channel: [mandatory] [ meta, fasta ] + dbsnp // channel: [mandatory] [ dbsnp ] + dbsnp_tbi // channel: [mandatory] [ dbsnp_tbi ] + + main: + // Combine normal and tumor data + ch_bam = bam_tumor.join(bam_normal, by: [0]) + + MUSE_CALL( ch_bam.combine(fasta.map { _meta, fasta_file -> fasta_file }) ) + + MUSE_SUMP( MUSE_CALL.out.txt.combine(dbsnp.combine(dbsnp_tbi)) ) + + // add variantcaller to meta map + vcf = MUSE_SUMP.out.vcf.map { meta, vcf -> [meta + [variantcaller: 'muse'], vcf] } + tbi = MUSE_SUMP.out.tbi.map { meta, tbi -> [meta + [variantcaller: 'muse'], tbi] } + + emit: + vcf + tbi +} diff --git a/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf new file mode 100644 index 0000000000..c6e0873855 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf @@ -0,0 +1,252 @@ +// +// +// MUTECT2: tumor-normal mode variantcalling: getpileupsummaries, calculatecontamination, learnreadorientationmodel and filtermutectcalls +// + +include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../modules/nf-core/gatk4/calculatecontamination' +include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../modules/nf-core/gatk4/filtermutectcalls' +include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES_NORMAL } from '../../../modules/nf-core/gatk4/gatherpileupsummaries' +include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES_TUMOR } from '../../../modules/nf-core/gatk4/gatherpileupsummaries' +include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_NORMAL } from '../../../modules/nf-core/gatk4/getpileupsummaries' +include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_TUMOR } from '../../../modules/nf-core/gatk4/getpileupsummaries' +include { GATK4_LEARNREADORIENTATIONMODEL as LEARNREADORIENTATIONMODEL } from '../../../modules/nf-core/gatk4/learnreadorientationmodel' +include { GATK4_MERGEMUTECTSTATS as MERGEMUTECTSTATS } from '../../../modules/nf-core/gatk4/mergemutectstats' +include { GATK4_MERGEVCFS as MERGE_MUTECT2 } from '../../../modules/nf-core/gatk4/mergevcfs' +include { GATK4_MUTECT2 as MUTECT2_PAIRED } from '../../../modules/nf-core/gatk4/mutect2' + +workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 { + take: + input // channel: [ meta, [ input ], [ input_index ] ] + fasta // channel: /path/to/reference/fasta + fai // channel: /path/to/reference/fasta/index + dict // channel: /path/to/reference/fasta/dictionary + germline_resource // channel: /path/to/germline/resource + germline_resource_tbi // channel: /path/to/germline/index + panel_of_normals // channel: /path/to/panel/of/normals + panel_of_normals_tbi // channel: /path/to/panel/of/normals/index + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + joint_mutect2 // boolean: [mandatory] [default: false] run mutect2 in joint mode + + main: + versions = Channel.empty() + + // If no germline resource is provided, then create an empty channel to avoid GetPileupsummaries from being run + // Handle Channel.value([]) input from prepare_genome by converting to proper empty channel + germline_resource_pileup = germline_resource.filter { it != [] } + germline_resource_pileup_tbi = germline_resource_tbi.filter { it != [] } + + // Combine input and intervals for spread and gather strategy + // Move num_intervals to meta map and reorganize channel for MUTECT2_PAIRED module + input_intervals = input + .combine(intervals) + .map { meta, input_list, input_index_list, intervals_, num_intervals -> [meta + [num_intervals: num_intervals], input_list, input_index_list, intervals_] } + + if (joint_mutect2) { + + // Separate normal cram files + // Extract tumor cram files + ch_cram = input.multiMap { meta, cram, crai -> + normal: [meta - meta.subMap('tumor_id'), cram[0], crai[0]] + tumor: [meta - meta.subMap('tumor_id'), cram[1], crai[1]] + } + + // Remove duplicates from normal channel and merge normal and tumor crams by patient + ch_tn_cram = ch_cram.normal.unique().mix(ch_cram.tumor).groupTuple() + // Combine input and intervals for scatter and gather strategy + // Move num_intervals to meta map and reorganize channel for MUTECT2_PAIRED module + ch_tn_intervals = ch_tn_cram + .combine(intervals) + .map { meta, cram, crai, intervals_, num_intervals -> [meta + [num_intervals: num_intervals], cram, crai, intervals_] } + + MUTECT2_PAIRED(ch_tn_intervals, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi) + } + else { + + // Perform variant calling using mutect2 module pair mode + // meta: [id:tumor_id_vs_normal_id, normal_id, num_intervals, patient, sex, tumor_id] + MUTECT2_PAIRED(input_intervals, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi) + } + + // Figuring out if there is one or more vcf(s) from the same sample + vcf_branch = MUTECT2_PAIRED.out.vcf.branch { + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more tbi(s) from the same sample + tbi_branch = MUTECT2_PAIRED.out.tbi.branch { + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more vcf(s) from the same sample + stats_branch = MUTECT2_PAIRED.out.stats.branch { + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more vcf(s) from the same sample + f1r2_branch = MUTECT2_PAIRED.out.f1r2.branch { + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + vcf_to_merge = vcf_branch.intervals.map { meta, vcf -> [groupKey(meta, meta.num_intervals), vcf] }.groupTuple() + stats_to_merge = stats_branch.intervals.map { meta, stats -> [groupKey(meta, meta.num_intervals), stats] }.groupTuple() + f1r2_to_merge = f1r2_branch.intervals.map { meta, f1r2 -> [groupKey(meta, meta.num_intervals), f1r2] }.groupTuple() + + MERGE_MUTECT2(vcf_to_merge, dict) + MERGEMUTECTSTATS(stats_to_merge) + + // Mix intervals and no_intervals channels together and remove no longer necessary field: normal_id, tumor_id, num_intervals + vcf = Channel.empty() + .mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals) + .map { meta, vcf -> + [joint_mutect2 ? meta - meta.subMap('normal_id', 'num_intervals') : meta - meta.subMap('num_intervals'), vcf] + } + tbi = Channel.empty() + .mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals) + .map { meta, tbi -> + [joint_mutect2 ? meta - meta.subMap('normal_id', 'num_intervals') : meta - meta.subMap('num_intervals'), tbi] + } + stats = Channel.empty() + .mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals) + .map { meta, stats -> + [joint_mutect2 ? meta - meta.subMap('normal_id', 'num_intervals') : meta - meta.subMap('num_intervals'), stats] + } + f1r2 = Channel.empty() + .mix(f1r2_to_merge, f1r2_branch.no_intervals) + .map { meta, f1r2 -> + [joint_mutect2 ? meta - meta.subMap('normal_id', 'num_intervals') : meta - meta.subMap('num_intervals'), f1r2] + } + + // Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2 + LEARNREADORIENTATIONMODEL(f1r2) + + pileup = input_intervals.multiMap { meta, input_list, input_index_list, intervals_ -> + tumor: [meta, input_list[1], input_index_list[1], intervals_] + normal: [meta, input_list[0], input_index_list[0], intervals_] + } + + // Prepare input channel for normal pileup summaries. + // Remember, the input channel contains tumor-normal pairs, so there will be multiple copies of the normal sample for each tumor for a given patient. + // Therefore, we use unique function to generate normal pileup summaries once for each patient for better efficiency. + pileup_normal = pileup.normal.map { meta, cram, crai, intervals_ -> [meta - meta.subMap('tumor_id') + [id: meta.normal_id], cram, crai, intervals_] }.unique() + // Prepare input channel for tumor pileup summaries. + pileup_tumor = pileup.tumor.map { meta, cram, crai, intervals_ -> [meta - meta.subMap('normal_id') + [id: meta.tumor_id], cram, crai, intervals_] } + + // Generate pileup summary tables using getepileupsummaries. tumor sample should always be passed in as the first input and input list entries of vcf_to_filter, + GETPILEUPSUMMARIES_NORMAL(pileup_normal, fasta, fai, dict, germline_resource_pileup, germline_resource_pileup_tbi) + GETPILEUPSUMMARIES_TUMOR(pileup_tumor, fasta, fai, dict, germline_resource_pileup, germline_resource_pileup_tbi) + + // Figuring out if there is one or more table(s) from the same sample + pileup_table_normal_branch = GETPILEUPSUMMARIES_NORMAL.out.table.branch { + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more table(s) from the same sample + pileup_table_tumor_branch = GETPILEUPSUMMARIES_TUMOR.out.table.branch { + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + pileup_table_normal_to_merge = pileup_table_normal_branch.intervals.map { meta, table -> [groupKey(meta, meta.num_intervals), table] }.groupTuple() + pileup_table_tumor_to_merge = pileup_table_tumor_branch.intervals.map { meta, table -> [groupKey(meta, meta.num_intervals), table] }.groupTuple() + + // Merge Pileup Summaries + GATHERPILEUPSUMMARIES_NORMAL(pileup_table_normal_to_merge, dict.map { _meta, dict_ -> [dict_] }) + GATHERPILEUPSUMMARIES_TUMOR(pileup_table_tumor_to_merge, dict.map { _meta, dict_ -> [dict_] }) + + // Do some channel magic to generate tumor-normal pairs again. + // This is necessary because we generated one normal pileup summary for each patient but we need run calculate contamination for each tumor-normal pair. + pileup_table_tumor = Channel.empty().mix(GATHERPILEUPSUMMARIES_TUMOR.out.table, pileup_table_tumor_branch.no_intervals).map { meta, table -> [meta - meta.subMap('normal_id', 'tumor_id', 'num_intervals') + [id: meta.patient], meta.id, table] } + pileup_table_normal = Channel.empty().mix(GATHERPILEUPSUMMARIES_NORMAL.out.table, pileup_table_normal_branch.no_intervals).map { meta, table -> [meta - meta.subMap('normal_id', 'tumor_id', 'num_intervals') + [id: meta.patient], meta.id, table] } + + ch_calculatecontamination_in_tables = pileup_table_tumor + .combine( + pileup_table_normal, + by: 0 + ) + .map { meta, tumor_id, tumor_table, normal_id, normal_table -> + if (joint_mutect2) { + [meta + [id: tumor_id + "_vs_" + normal_id], tumor_table, normal_table] + } + else { + // we need tumor and normal ID for further post processing + [meta + [id: tumor_id + "_vs_" + normal_id, normal_id: normal_id, tumor_id: tumor_id], tumor_table, normal_table] + } + } + + CALCULATECONTAMINATION(ch_calculatecontamination_in_tables) + + // Initialize empty channel: Contamination calculation is run on pileup table, pileup is not run if germline resource is not provided + calculatecontamination_out_seg = Channel.empty() + calculatecontamination_out_cont = Channel.empty() + + if (joint_mutect2) { + // Reduce the meta to only patient name + calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map { meta, seg -> [meta + [id: meta.patient], seg] }.groupTuple() + calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map { meta, cont -> [meta + [id: meta.patient], cont] }.groupTuple() + } + else { + // Keep tumor_vs_normal ID + calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation + calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination + } + + // Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables + // meta joint calling: [id:patient_id, patient, sex] + // meta paired calling: [id:tumorID_vs_normalID, normal_ID, patient, sex, tumorID] + vcf_to_filter = vcf + .join(tbi, failOnDuplicate: true, failOnMismatch: true) + .join(stats, failOnDuplicate: true, failOnMismatch: true) + .join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true) + .join(calculatecontamination_out_seg) + .join(calculatecontamination_out_cont) + .map { meta, vcf_, tbi_, stats_, orientation, seg, cont -> [meta, vcf_, tbi_, stats_, orientation, seg, cont, []] } + + FILTERMUTECTCALLS(vcf_to_filter, fasta, fai, dict) + + // Handle filtered vs unfiltered output + // vcf_mutect2 and tbi_mutect2 should always contain usable output: + // - If filtering happened (germline_resource provided): use filtered results + // - If filtering didn't happen: use unfiltered results with variantcaller metadata + // This ensures downstream processes always have mutect2 calls available for consensus calling + // Using concat() + unique() ensures filtered output takes precedence deterministically + // concat() preserves order (filtered first), unique() keeps first occurrence of each meta key + vcf_mutect2 = FILTERMUTECTCALLS.out.vcf + .map { meta, vcf_ -> [meta - meta.subMap('num_intervals') + [variantcaller: 'mutect2'], vcf_] } + .concat(vcf.map { meta, vcf_ -> [meta - meta.subMap('num_intervals') + [variantcaller: 'mutect2'], vcf_] }) + .unique { it[0] } + + tbi_mutect2 = FILTERMUTECTCALLS.out.tbi + .map { meta, tbi_ -> [meta - meta.subMap('num_intervals') + [variantcaller: 'mutect2'], tbi_] } + .concat(tbi.map { meta, tbi_ -> [meta - meta.subMap('num_intervals') + [variantcaller: 'mutect2'], tbi_] }) + .unique { it[0] } + + versions = versions.mix(MERGE_MUTECT2.out.versions) + versions = versions.mix(CALCULATECONTAMINATION.out.versions) + versions = versions.mix(FILTERMUTECTCALLS.out.versions) + versions = versions.mix(GETPILEUPSUMMARIES_NORMAL.out.versions) + versions = versions.mix(GETPILEUPSUMMARIES_TUMOR.out.versions) + versions = versions.mix(GATHERPILEUPSUMMARIES_NORMAL.out.versions) + versions = versions.mix(GATHERPILEUPSUMMARIES_TUMOR.out.versions) + versions = versions.mix(LEARNREADORIENTATIONMODEL.out.versions) + versions = versions.mix(MERGEMUTECTSTATS.out.versions) + versions = versions.mix(MUTECT2_PAIRED.out.versions) + + emit: + vcf = vcf_mutect2 // channel: [ meta, vcf ] - filtered if germline_resource provided, otherwise unfiltered + tbi = tbi_mutect2 // channel: [ meta, tbi ] - filtered if germline_resource provided, otherwise unfiltered + + stats_filtered = FILTERMUTECTCALLS.out.stats // channel: [ meta, stats ] + artifact_priors = LEARNREADORIENTATIONMODEL.out.artifactprior // channel: [ meta, artifactprior ] + pileup_table_normal // channel: [ meta, table_normal ] + pileup_table_tumor // channel: [ meta, table_tumor ] + contamination_table = calculatecontamination_out_cont // channel: [ meta, contamination ] + segmentation_table = calculatecontamination_out_seg // channel: [ meta, segmentation ] + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/meta.yml b/subworkflows/local/bam_variant_calling_somatic_mutect2/meta.yml similarity index 98% rename from subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/meta.yml rename to subworkflows/local/bam_variant_calling_somatic_mutect2/meta.yml index 4c42addfa4..d5abdca939 100644 --- a/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/meta.yml +++ b/subworkflows/local/bam_variant_calling_somatic_mutect2/meta.yml @@ -74,7 +74,7 @@ output: - versions: type: file description: File containing software versions - pattern: 'versions.yml' + pattern: "versions.yml" - mutect2_vcf: type: file description: Compressed vcf file to be used for variant_calling. @@ -124,4 +124,4 @@ output: description: file containing statistics of the filtermutectcalls run. pattern: "*.filteringStats.tsv" authors: - - '@GCJMackenzie' + - "@GCJMackenzie" diff --git a/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf b/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf new file mode 100644 index 0000000000..63bc89f461 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf @@ -0,0 +1,82 @@ +// +// STRELKA2 tumor-normal variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { GATK4_MERGEVCFS as MERGE_STRELKA_INDELS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_STRELKA_SNVS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { STRELKA_SOMATIC } from '../../../modules/nf-core/strelka/somatic/main' + +workflow BAM_VARIANT_CALLING_SOMATIC_STRELKA { + take: + cram // channel: [mandatory] [ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi ] manta* are optional + dict // channel: [optional] [ meta, dict ] + fasta // channel: [mandatory] [ fasta ] + fasta_fai // channel: [mandatory] [ fasta_fai ] + intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi, num_intervals ] or [ [], [], 0 ] if no intervals + + main: + versions = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals = cram.combine(intervals) + // Move num_intervals to meta map + .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, intervals_, intervals_index, num_intervals -> [ meta + [ num_intervals:num_intervals ], normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, intervals_, intervals_index ] } + + STRELKA_SOMATIC(cram_intervals, fasta, fasta_fai ) + + // Figuring out if there is one or more vcf(s) from the same sample + vcf_indels = STRELKA_SOMATIC.out.vcf_indels.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more vcf(s) from the same sample + vcf_snvs = STRELKA_SOMATIC.out.vcf_snvs.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + vcf_indels_to_merge = vcf_indels.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + vcf_snvs_to_merge = vcf_snvs.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + + MERGE_STRELKA_INDELS(vcf_indels_to_merge, dict) + MERGE_STRELKA_SNVS(vcf_snvs_to_merge, dict) + + // Figuring out if there is one or more tbi(s) from the same sample + tbi_indels = STRELKA_SOMATIC.out.vcf_indels_tbi.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more tbi(s) from the same sample + tbi_snvs = STRELKA_SOMATIC.out.vcf_snvs_tbi.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Mix intervals and no_intervals channels together + vcf = Channel.empty().mix(MERGE_STRELKA_INDELS.out.vcf, MERGE_STRELKA_SNVS.out.vcf, vcf_indels.no_intervals, vcf_snvs.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'strelka' ], vcf ] } + + tbi = Channel.empty().mix(MERGE_STRELKA_INDELS.out.tbi, MERGE_STRELKA_SNVS.out.tbi, tbi_indels.no_intervals, tbi_snvs.no_intervals) + // add variantcaller to meta map and remove no longer necessary field: num_intervals + .map{ meta, tbi -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'strelka' ], tbi ] } + + versions = versions.mix(MERGE_STRELKA_SNVS.out.versions) + versions = versions.mix(MERGE_STRELKA_INDELS.out.versions) + versions = versions.mix(STRELKA_SOMATIC.out.versions) + + emit: + vcf + tbi + + versions +} diff --git a/subworkflows/local/bam_variant_calling_somatic_tiddit/main.nf b/subworkflows/local/bam_variant_calling_somatic_tiddit/main.nf new file mode 100644 index 0000000000..f768c42791 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_somatic_tiddit/main.nf @@ -0,0 +1,38 @@ +// +// TIDDIT single sample variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { BAM_VARIANT_CALLING_SINGLE_TIDDIT as TIDDIT_NORMAL } from '../bam_variant_calling_single_tiddit/main.nf' +include { BAM_VARIANT_CALLING_SINGLE_TIDDIT as TIDDIT_TUMOR } from '../bam_variant_calling_single_tiddit/main.nf' +include { SVDB_MERGE } from '../../../modules/nf-core/svdb/merge/main.nf' + +workflow BAM_VARIANT_CALLING_SOMATIC_TIDDIT { + take: + cram_normal + cram_tumor + fasta + bwa + + main: + + versions = Channel.empty() + + TIDDIT_NORMAL(cram_normal, fasta, bwa) + TIDDIT_TUMOR(cram_tumor, fasta, bwa) + + SVDB_MERGE(TIDDIT_NORMAL.out.vcf.join(TIDDIT_TUMOR.out.vcf, failOnDuplicate: true, failOnMismatch: true).map{ meta, vcf_normal, vcf_tumor -> [ meta, [vcf_normal, vcf_tumor] ] }, false, true) + + vcf = SVDB_MERGE.out.vcf.map{ meta, vcf -> [ meta + [ variantcaller:'tiddit' ], vcf ] } + tbi = SVDB_MERGE.out.tbi.map{ meta, tbi -> [ meta + [ variantcaller:'tiddit' ], tbi ] } + + versions = versions.mix(TIDDIT_NORMAL.out.versions) + versions = versions.mix(TIDDIT_TUMOR.out.versions) + versions = versions.mix(SVDB_MERGE.out.versions) + + emit: + versions + vcf + tbi +} diff --git a/subworkflows/local/bam_variant_calling_somatic_tnscope/main.nf b/subworkflows/local/bam_variant_calling_somatic_tnscope/main.nf new file mode 100644 index 0000000000..dae2504f7f --- /dev/null +++ b/subworkflows/local/bam_variant_calling_somatic_tnscope/main.nf @@ -0,0 +1,75 @@ +// +// +// SENTIEON TNSCOPE: tumor-normal mode variantcalling +// + +include { SENTIEON_TNSCOPE } from '../../../modules/nf-core/sentieon/tnscope/main' +include { GATK4_MERGEVCFS as MERGE_TNSCOPE } from '../../../modules/nf-core/gatk4/mergevcfs/main' + +workflow BAM_VARIANT_CALLING_SOMATIC_TNSCOPE { + take: + input // channel: [ meta, [ input ], [ input_index ] ] + fasta // channel: [ meta, /path/to/reference/fasta ] + fai // channel: [ meta, /path/to/reference/fasta/index ] + dict // channel: [ meta, /path/to/reference/dict ] + germline_resource // channel: /path/to/germline/resource + germline_resource_tbi // channel: /path/to/germline/index + panel_of_normals // channel: /path/to/panel/of/normals + panel_of_normals_tbi // channel: /path/to/panel/of/normals/index + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + + main: + versions = Channel.empty() + + // Combine input and intervals for spread and gather strategy + input_intervals = input.combine(intervals) + // Move num_intervals to meta map and reorganize channel for TNSCOPE module + .map{ meta, input_, index, intervals_, num_intervals -> [ meta + [ num_intervals:num_intervals ], input_, index, intervals_ ] } + + SENTIEON_TNSCOPE( + input_intervals, + fasta, + fai, + germline_resource.map{resource -> [[id: "resource"], resource]}, + germline_resource_tbi.map{index -> [[id: "resource"], index]}, + panel_of_normals.map{pon -> [[id: "pon"], pon]}, + panel_of_normals_tbi.map{index -> [[id: "pon"], index]}, + [[],[]], // cosmic + [[],[]] // cosmic_tbi + ) + versions = versions.mix(SENTIEON_TNSCOPE.out.versions) + + // Figuring out if there is one or more vcf(s) from the same sample + vcf_branch = SENTIEON_TNSCOPE.out.vcf.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + // Figuring out if there is one or more tbi(s) from the same sample + tbi_branch = SENTIEON_TNSCOPE.out.index.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + vcf_to_merge = vcf_branch.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ] }.groupTuple() + + // Merge if required + MERGE_TNSCOPE(vcf_to_merge, dict) + versions = versions.mix(MERGE_TNSCOPE.out.versions) + + // Mix intervals and no_intervals channels together + // Remove unnecessary metadata and add variantcaller + vcf = Channel.empty() + .mix(MERGE_TNSCOPE.out.vcf, vcf_branch.no_intervals) + .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'sentieon_tnscope' ], vcf ] } + + index = Channel.empty() + .mix(MERGE_TNSCOPE.out.tbi, tbi_branch.no_intervals) + .map{ meta, tbi -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'sentieon_tnscope' ], tbi ] } + + emit: + vcf // channel: [ meta, vcf ] + index // channel: [ meta, index ] + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf new file mode 100644 index 0000000000..61156784b6 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf @@ -0,0 +1,262 @@ +// +// TUMOR ONLY VARIANT CALLING +// Should be only run on patients without normal sample +// + +include { BAM_VARIANT_CALLING_CNVKIT } from '../bam_variant_calling_cnvkit' +include { BAM_VARIANT_CALLING_FREEBAYES } from '../bam_variant_calling_freebayes' +include { BAM_VARIANT_CALLING_MPILEUP } from '../bam_variant_calling_mpileup' +include { BAM_VARIANT_CALLING_SINGLE_TIDDIT } from '../bam_variant_calling_single_tiddit' +include { BAM_VARIANT_CALLING_TUMOR_ONLY_CONTROLFREEC } from '../bam_variant_calling_tumor_only_controlfreec' +include { BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA } from '../bam_variant_calling_tumor_only_manta' +include { BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 } from '../bam_variant_calling_tumor_only_mutect2' +include { BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ } from '../bam_variant_calling_tumor_only_lofreq' +include { BAM_VARIANT_CALLING_TUMOR_ONLY_TNSCOPE } from '../bam_variant_calling_tumor_only_tnscope' +include { MSISENSOR2_MSI } from '../../../modules/nf-core/msisensor2/msi' + +workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { + take: + tools // Mandatory, list of tools to apply + bam // channel: [mandatory] bam + cram // channel: [mandatory] cram + bwa // channel: [optional] bwa + cf_chrom_len // channel: [optional] controlfreec length file + chr_files + cnvkit_reference + dbsnp // channel: [mandatory] dbsnp + dbsnp_tbi // channel: [mandatory] dbsnp_tbi + dict // channel: [mandatory] dict + fasta // channel: [mandatory] fasta + fasta_fai // channel: [mandatory] fasta_fai + germline_resource // channel: [optional] germline_resource + germline_resource_tbi // channel: [optional] germline_resource_tbi + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + intervals_bed_gz_tbi // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi, num_intervals ] or [ [], [], 0 ] if no intervals + intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped + intervals_bed_gz_tbi_combined // channel: [mandatory] intervals/target regions in one file zipped + mappability + msisensor2_models + panel_of_normals // channel: [optional] panel_of_normals + panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi + joint_mutect2 // boolean: [mandatory] [default: false] run mutect2 in joint mode + wes // boolean: [mandatory] [default: false] whether targeted data is processed + + main: + // Channels are often remapped to match module/subworkflow + + // Gather all versions + versions = Channel.empty() + + //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config + out_msisensor2 = Channel.empty() + vcf_freebayes = Channel.empty() + vcf_lofreq = Channel.empty() + vcf_manta = Channel.empty() + vcf_mpileup = Channel.empty() + vcf_mutect2 = Channel.empty() + vcf_tiddit = Channel.empty() + vcf_tnscope = Channel.empty() + + // Initialize empty TBI channels + tbi_freebayes = Channel.empty() + tbi_lofreq = Channel.empty() + tbi_manta = Channel.empty() + tbi_mpileup = Channel.empty() + tbi_mutect2 = Channel.empty() + tbi_tiddit = Channel.empty() + tbi_tnscope = Channel.empty() + + // MPILEUP + if (tools && tools.split(',').contains('mpileup') || tools.split(',').contains('controlfreec')) { + BAM_VARIANT_CALLING_MPILEUP( + cram, + dict, + fasta, + intervals, + ) + vcf_mpileup = BAM_VARIANT_CALLING_MPILEUP.out.vcf + tbi_mpileup = BAM_VARIANT_CALLING_MPILEUP.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_MPILEUP.out.versions) + } + + // CONTROLFREEC (depends on MPILEUP) + if (tools && tools.split(',').contains('controlfreec')) { + BAM_VARIANT_CALLING_TUMOR_ONLY_CONTROLFREEC( + BAM_VARIANT_CALLING_MPILEUP.out.mpileup.map { meta, pileup_tumor -> [meta, [], pileup_tumor, [], [], [], []] }, + fasta.map { _meta, fasta_ -> [fasta_] }, + cf_chrom_len ?: fasta_fai.map { _meta, fasta_fai_ -> [fasta_fai_] }, + dbsnp, + dbsnp_tbi, + chr_files, + mappability, + wes ? intervals_bed_combined : [], + ) + + versions = versions.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_CONTROLFREEC.out.versions) + } + + // CNVKIT + if (tools && tools.split(',').contains('cnvkit')) { + BAM_VARIANT_CALLING_CNVKIT( + bam.map { meta_, bam_, _bai -> [meta_, bam_, []] }, + fasta, + fasta_fai, + [[id: "null"], []], + cnvkit_reference.map { it -> [[id: it[0].baseName], it] }, + ) + + versions = versions.mix(BAM_VARIANT_CALLING_CNVKIT.out.versions) + } + + // FREEBAYES + if (tools && tools.split(',').contains('freebayes')) { + BAM_VARIANT_CALLING_FREEBAYES( + cram.map { meta_, cram_, crai_ -> [meta_, cram_, crai_, [], []] }, + dict, + fasta, + fasta_fai, + intervals, + ) + + vcf_freebayes = BAM_VARIANT_CALLING_FREEBAYES.out.vcf + tbi_freebayes = BAM_VARIANT_CALLING_FREEBAYES.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_FREEBAYES.out.versions) + } + + // MSISENSOR + if (tools && tools.split(',').contains('msisensor2')) { + + MSISENSOR2_MSI(bam, msisensor2_models) + + versions = versions.mix(MSISENSOR2_MSI.out.versions) + out_msisensor2 = out_msisensor2.mix(MSISENSOR2_MSI.out.distribution) + out_msisensor2 = out_msisensor2.mix(MSISENSOR2_MSI.out.somatic) + } + + // MUTECT2 + if (tools && tools.split(',').contains('mutect2')) { + BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2( + cram.map { meta_, cram_, crai_ -> + joint_mutect2 + ? [meta_ - meta_.subMap('data_type') + [id: meta_.patient], cram_, crai_] + : [meta_ - meta_.subMap('data_type'), cram_, crai_] + }, + fasta, + fasta_fai, + dict, + germline_resource, + germline_resource_tbi, + panel_of_normals, + panel_of_normals_tbi, + intervals, + joint_mutect2, + ) + + // vcf_mutect2 and tbi_mutect2 always contain usable output (filtered if available, otherwise unfiltered) + vcf_mutect2 = BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2.out.vcf + tbi_mutect2 = BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2.out.versions) + } + + //LOFREQ + if (tools && tools.split(',').contains('lofreq')) { + BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ( + cram, + fasta, + fasta_fai, + intervals, + dict, + ) + vcf_lofreq = BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ.out.vcf + tbi_lofreq = BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ.out.versions) + } + + // MANTA + if (tools && tools.split(',').contains('manta')) { + BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA( + cram, + fasta, + fasta_fai, + intervals_bed_gz_tbi_combined, + ) + + vcf_manta = BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA.out.vcf + tbi_manta = BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA.out.versions) + } + + // TIDDIT + if (tools && tools.split(',').contains('tiddit')) { + BAM_VARIANT_CALLING_SINGLE_TIDDIT( + cram, + fasta, + bwa, + ) + + vcf_tiddit = BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.vcf + tbi_tiddit = BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.versions) + } + + // TNSCOPE + if (tools && tools.split(',').contains('sentieon_tnscope')) { + BAM_VARIANT_CALLING_TUMOR_ONLY_TNSCOPE( + cram, + fasta, + fasta_fai, + dict, + germline_resource, + germline_resource_tbi, + panel_of_normals, + panel_of_normals_tbi, + intervals, + ) + + vcf_tnscope = BAM_VARIANT_CALLING_TUMOR_ONLY_TNSCOPE.out.vcf + tbi_tnscope = BAM_VARIANT_CALLING_TUMOR_ONLY_TNSCOPE.out.tbi + versions = versions.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_TNSCOPE.out.versions) + } + + vcf_all = Channel.empty() + .mix( + vcf_freebayes, + vcf_lofreq, + vcf_manta, + vcf_mutect2, + vcf_mpileup, + vcf_tiddit, + vcf_tnscope, + ) + + tbi_all = Channel.empty() + .mix( + tbi_freebayes, + tbi_lofreq, + tbi_manta, + tbi_mutect2, + tbi_mpileup, + tbi_tiddit, + tbi_tnscope, + ) + + emit: + out_msisensor2 + vcf_all + tbi_all + vcf_freebayes + vcf_lofreq + vcf_manta + vcf_mpileup + vcf_mutect2 + vcf_tiddit + vcf_tnscope + tbi_freebayes + tbi_lofreq + tbi_manta + tbi_mpileup + tbi_mutect2 + tbi_tiddit + tbi_tnscope + versions +} diff --git a/subworkflows/local/bam_variant_calling_tumor_only_controlfreec/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_controlfreec/main.nf new file mode 100644 index 0000000000..e7178cbaa4 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_tumor_only_controlfreec/main.nf @@ -0,0 +1,43 @@ +// +// CONTROLFREEC tumor-only variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { CONTROLFREEC_FREEC as FREEC_TUMORONLY } from '../../../modules/nf-core/controlfreec/freec/main' +include { CONTROLFREEC_ASSESSSIGNIFICANCE as ASSESS_SIGNIFICANCE } from '../../../modules/nf-core/controlfreec/assesssignificance/main' +include { CONTROLFREEC_FREEC2BED as FREEC2BED } from '../../../modules/nf-core/controlfreec/freec2bed/main' +include { CONTROLFREEC_FREEC2CIRCOS as FREEC2CIRCOS } from '../../../modules/nf-core/controlfreec/freec2circos/main' +include { CONTROLFREEC_MAKEGRAPH2 as MAKEGRAPH2 } from '../../../modules/nf-core/controlfreec/makegraph2/main' + +workflow BAM_VARIANT_CALLING_TUMOR_ONLY_CONTROLFREEC { + take: + controlfreec_input // channel: [mandatory] [meta, [], pileup_tumor, [], [], [], []] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + dbsnp // channel: [mandatory] + dbsnp_tbi // channel: [mandatory] + chr_files // channel: [mandatory] + mappability // channel: [mandatory] + intervals_bed // channel: [optional] Contains a bed file of all intervals combined provided with the cram input(s). Should be empty for WGS + + main: + + ch_versions = Channel.empty() + + FREEC_TUMORONLY(controlfreec_input, fasta, fasta_fai, [], dbsnp, dbsnp_tbi, chr_files, mappability, intervals_bed, []) + + ASSESS_SIGNIFICANCE(FREEC_TUMORONLY.out.CNV.join(FREEC_TUMORONLY.out.ratio, failOnDuplicate: true, failOnMismatch: true)) + FREEC2BED(FREEC_TUMORONLY.out.ratio) + FREEC2CIRCOS(FREEC_TUMORONLY.out.ratio) + MAKEGRAPH2(FREEC_TUMORONLY.out.ratio.join(FREEC_TUMORONLY.out.BAF, failOnDuplicate: true, failOnMismatch: true)) + + ch_versions = ch_versions.mix(FREEC_TUMORONLY.out.versions) + ch_versions = ch_versions.mix(ASSESS_SIGNIFICANCE.out.versions) + ch_versions = ch_versions.mix(FREEC2BED.out.versions) + ch_versions = ch_versions.mix(FREEC2CIRCOS.out.versions) + ch_versions = ch_versions.mix(MAKEGRAPH2.out.versions) + + emit: + versions = ch_versions +} diff --git a/subworkflows/local/bam_variant_calling_tumor_only_lofreq/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_lofreq/main.nf new file mode 100644 index 0000000000..0d87ae54b9 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_tumor_only_lofreq/main.nf @@ -0,0 +1,53 @@ +include { LOFREQ_CALLPARALLEL as LOFREQ } from '../../../modules/nf-core/lofreq/callparallel/main.nf' +include { GATK4_MERGEVCFS as MERGE_LOFREQ } from '../../../modules/nf-core/gatk4/mergevcfs/main.nf' + +workflow BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ { + take: + input // channel: [mandatory] [ meta, tumor_cram, tumor_crai ] + fasta // channel: [mandatory] [ fasta ] + fai // channel: [mandatory] [ fasta_fai ] + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] + dict // channel: /path/to/reference/fasta/dictionary + + main: + versions = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + input_intervals = input.combine(intervals) + // Move num_intervals to meta map + .map {meta, tumor_cram, tumor_crai, intervals_, num_intervals -> [meta + [ num_intervals:num_intervals ], tumor_cram, tumor_crai, intervals_]} + + LOFREQ(input_intervals, fasta, fai) // Call variants with LoFreq + + // Figuring out if there is one or more vcf(s) from the same sample + vcf_branch = LOFREQ.out.vcf.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more tbi(s) from the same sample + tbi_branch = LOFREQ.out.tbi.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + vcf_to_merge = vcf_branch.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ] }.groupTuple() + + MERGE_LOFREQ(vcf_to_merge, dict) + + // Mix intervals and no_intervals channels together + // Remove unnecessary metadata + vcf = Channel.empty().mix(MERGE_LOFREQ.out.vcf, vcf_branch.no_intervals).map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'lofreq' ], vcf ] } + tbi = Channel.empty().mix(MERGE_LOFREQ.out.tbi, tbi_branch.no_intervals).map{ meta, tbi -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'lofreq' ], tbi ] } + + versions = versions.mix(MERGE_LOFREQ.out.versions) + versions = versions.mix(LOFREQ.out.versions) + + emit: + vcf + tbi + versions +} diff --git a/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf new file mode 100644 index 0000000000..b451c3962d --- /dev/null +++ b/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf @@ -0,0 +1,47 @@ +// +// MANTA single sample variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { MANTA_TUMORONLY } from '../../../modules/nf-core/manta/tumoronly/main' + +// Seems to be the consensus on upstream modules implementation too +workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA { + take: + cram // channel: [mandatory] [ meta, cram, crai ] + fasta // channel: [mandatory] [ meta, fasta ] + fasta_fai // channel: [mandatory] [ meta, fasta_fai ] + intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi ] or [ [], [] ] if no intervals + + main: + versions = Channel.empty() + + // Combine cram and intervals, account for 0 intervals + cram_intervals = cram.combine(intervals).map{ it -> + def bed_gz = it.size() > 3 ? it[3] : [] + def bed_tbi = it.size() > 3 ? it[4] : [] + + [it[0], it[1], it[2], bed_gz, bed_tbi] + } + + MANTA_TUMORONLY(cram_intervals, fasta, fasta_fai, []) + + small_indels_vcf = MANTA_TUMORONLY.out.candidate_small_indels_vcf + candidate_sv_vcf = MANTA_TUMORONLY.out.candidate_sv_vcf + tumor_sv_vcf = MANTA_TUMORONLY.out.tumor_sv_vcf + tumor_sv_vcf_tbi = MANTA_TUMORONLY.out.tumor_sv_vcf_tbi + + // Only tumor sv should get annotated + // add variantcaller to meta map + vcf = tumor_sv_vcf.map{ meta, vcf -> [ meta + [ variantcaller:'manta' ], vcf ] } + tbi = tumor_sv_vcf_tbi.map{ meta, tbi -> [ meta + [ variantcaller:'manta' ], tbi ] } + + versions = versions.mix(MANTA_TUMORONLY.out.versions) + + emit: + vcf + tbi + + versions +} diff --git a/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf new file mode 100644 index 0000000000..1f69afb7cd --- /dev/null +++ b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf @@ -0,0 +1,190 @@ +// +// GATK MUTECT2 in tumor only mode: getepileupsummaries, calculatecontamination and filtermutectcalls +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { GATK4_MERGEVCFS as MERGE_MUTECT2 } from '../../../modules/nf-core/gatk4/mergevcfs' +include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../modules/nf-core/gatk4/calculatecontamination' +include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../modules/nf-core/gatk4/filtermutectcalls' +include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES } from '../../../modules/nf-core/gatk4/getpileupsummaries' +include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES } from '../../../modules/nf-core/gatk4/gatherpileupsummaries' +include { GATK4_LEARNREADORIENTATIONMODEL as LEARNREADORIENTATIONMODEL } from '../../../modules/nf-core/gatk4/learnreadorientationmodel' +include { GATK4_MERGEMUTECTSTATS as MERGEMUTECTSTATS } from '../../../modules/nf-core/gatk4/mergemutectstats' +include { GATK4_MUTECT2 as MUTECT2 } from '../../../modules/nf-core/gatk4/mutect2' + +workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 { + take: + input // channel: [ meta, [ input ], [ input_index ] ] + fasta // channel: /path/to/reference/fasta + fai // channel: /path/to/reference/fasta/index + dict // channel: /path/to/reference/fasta/dictionary + germline_resource // channel: /path/to/germline/resource + germline_resource_tbi // channel: /path/to/germline/index + panel_of_normals // channel: /path/to/panel/of/normals + panel_of_normals_tbi // channel: /path/to/panel/of/normals/index + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + joint_mutect2 // boolean: [mandatory] [default: false] run mutect2 in joint mode + + main: + versions = Channel.empty() + + // If no germline resource is provided, then create an empty channel to avoid GetPileupsummaries from being run + // Handle Channel.value([]) input from prepare_genome by converting to proper empty channel + germline_resource_pileup = germline_resource.filter { it != [] } + germline_resource_pileup_tbi = germline_resource_tbi.filter { it != [] } + + // Combine input and intervals for spread and gather strategy + input_intervals = input + .combine(intervals) + .map { meta, input_, index, intervals_, num_intervals -> [meta + [num_intervals: num_intervals], input_, index, intervals_] } + + if (joint_mutect2) { + // Perform variant calling using mutect2 module in tumor single mode + // Group cram files by patient + input_joint = input + .map { meta, input_, index -> [meta - meta.subMap('sample') + [id: meta.patient], input_, index] } + .groupTuple() + + // Add intervals for scatter-gather scaling + input_joint_intervals = input_joint + .combine(intervals) + .map { meta, cram, crai, intervals_, num_intervals -> [meta + [num_intervals: num_intervals], cram, crai, intervals_] } + MUTECT2(input_joint_intervals, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi) + } + else { + // Perform variant calling using mutect2 module in tumor single mode + MUTECT2(input_intervals, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi) + } + + // Figuring out if there is one or more vcf(s) from the same sample + vcf_branch = MUTECT2.out.vcf.branch { + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more tbi(s) from the same sample + tbi_branch = MUTECT2.out.tbi.branch { + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more stats(s) from the same sample + stats_branch = MUTECT2.out.stats.branch { + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Figuring out if there is one or more f1r2(s) from the same sample + f1r2_branch = MUTECT2.out.f1r2.branch { + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + vcf_to_merge = vcf_branch.intervals.map { meta, vcf -> [groupKey(meta, meta.num_intervals), vcf] }.groupTuple() + stats_to_merge = stats_branch.intervals.map { meta, stats -> [groupKey(meta, meta.num_intervals), stats] }.groupTuple() + f1r2_to_merge = f1r2_branch.intervals.map { meta, f1r2 -> [groupKey(meta, meta.num_intervals), f1r2] }.groupTuple() + + MERGE_MUTECT2(vcf_to_merge, dict) + MERGEMUTECTSTATS(stats_to_merge) + + // Mix intervals and no_intervals channels together + // Remove unnecessary metadata + vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals).map { meta, vcf -> [meta - meta.subMap('num_intervals'), vcf] } + tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals).map { meta, tbi -> [meta - meta.subMap('num_intervals'), tbi] } + stats = Channel.empty().mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals).map { meta, stats -> [meta - meta.subMap('num_intervals'), stats] } + f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals).map { meta, f1r2 -> [meta - meta.subMap('num_intervals'), f1r2] } + + // Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2 + LEARNREADORIENTATIONMODEL(f1r2) + + pileup_input = input_intervals.map { meta, cram, crai, intervals_ -> [meta + [id: meta.sample], cram, crai, intervals_] }.unique() + + // Generate pileup summary table using getepileupsummaries + GETPILEUPSUMMARIES(pileup_input, fasta, fai, dict, germline_resource_pileup, germline_resource_pileup_tbi) + + // Figuring out if there is one or more table(s) from the same sample + pileup_table_branch = GETPILEUPSUMMARIES.out.table.branch { + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + // Only when using intervals + pileup_table_to_merge = pileup_table_branch.intervals.map { meta, table -> [groupKey(meta, meta.num_intervals), table] }.groupTuple() + + GATHERPILEUPSUMMARIES(pileup_table_to_merge, dict.map { _meta, dict_ -> [dict_] }) + + // Mix intervals and no_intervals channels together + pileup_table = Channel.empty().mix(GATHERPILEUPSUMMARIES.out.table, pileup_table_branch.no_intervals).map { meta, table -> [meta - meta.subMap('num_intervals') + [id: meta.sample], table] } + + // Contamination and segmentation tables created using calculatecontamination on the pileup summary table + CALCULATECONTAMINATION(pileup_table.map { meta, table -> [meta, table, []] }) + + // Initialize empty channel: Contamination calculation is run on pileup table, pileup is not run if germline resource is not provided + calculatecontamination_out_seg = Channel.empty() + calculatecontamination_out_cont = Channel.empty() + + if (joint_mutect2) { + // Group tables by samples + calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map { meta, seg -> [meta - meta.subMap('sample', 'num_intervals') + [id: meta.patient], seg] }.groupTuple() + calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map { meta, cont -> [meta - meta.subMap('sample', 'num_intervals') + [id: meta.patient], cont] }.groupTuple() + } + else { + // Regular single sample mode + calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map { meta, seg -> [meta - meta.subMap('num_intervals'), seg] } + calculatecontamination_out_cont = CALCULATECONTAMINATION.out.contamination.map { meta, cont -> [meta - meta.subMap('num_intervals'), cont] } + } + + // Mutect2 calls filtered by filtermutectcalls using the contamination and segmentation tables + vcf_to_filter = vcf + .join(tbi, failOnDuplicate: true, failOnMismatch: true) + .join(stats, failOnDuplicate: true, failOnMismatch: true) + .join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true) + .join(calculatecontamination_out_seg) + .join(calculatecontamination_out_cont) + .map { meta, vcf_, tbi_, stats_, artifactprior, seg, cont -> [meta, vcf_, tbi_, stats_, artifactprior, seg, cont, []] } + + FILTERMUTECTCALLS(vcf_to_filter, fasta, fai, dict) + + // Handle filtered vs unfiltered output + // vcf_mutect2 and tbi_mutect2 should always contain usable output: + // - If filtering happened (germline_resource provided): use filtered results + // - If filtering didn't happen: use unfiltered results with variantcaller metadata + // This ensures downstream processes always have mutect2 calls available for consensus calling + // Using concat() + unique() ensures filtered output takes precedence deterministically + // concat() preserves order (filtered first), unique() keeps first occurrence of each meta key + vcf_mutect2 = FILTERMUTECTCALLS.out.vcf + .map { meta, vcf_ -> [meta - meta.subMap('num_intervals') + [variantcaller: 'mutect2'], vcf_] } + .concat(vcf.map { meta, vcf_ -> [meta - meta.subMap('num_intervals') + [variantcaller: 'mutect2'], vcf_] }) + .unique { it[0] } + + tbi_mutect2 = FILTERMUTECTCALLS.out.tbi + .map { meta, tbi_ -> [meta - meta.subMap('num_intervals') + [variantcaller: 'mutect2'], tbi_] } + .concat(tbi.map { meta, tbi_ -> [meta - meta.subMap('num_intervals') + [variantcaller: 'mutect2'], tbi_] }) + .unique { it[0] } + + versions = versions.mix(MERGE_MUTECT2.out.versions) + versions = versions.mix(CALCULATECONTAMINATION.out.versions) + versions = versions.mix(FILTERMUTECTCALLS.out.versions) + versions = versions.mix(GETPILEUPSUMMARIES.out.versions) + versions = versions.mix(GATHERPILEUPSUMMARIES.out.versions) + versions = versions.mix(LEARNREADORIENTATIONMODEL.out.versions) + versions = versions.mix(MERGEMUTECTSTATS.out.versions) + versions = versions.mix(MUTECT2.out.versions) + + emit: + vcf = vcf_mutect2 // channel: [ meta, vcf ] - filtered if germline_resource provided, otherwise unfiltered + tbi = tbi_mutect2 // channel: [ meta, tbi ] - filtered if germline_resource provided, otherwise unfiltered + + stats_filtered = FILTERMUTECTCALLS.out.stats // channel: [ meta, stats ] + + artifact_priors = LEARNREADORIENTATIONMODEL.out.artifactprior // channel: [ meta, artifactprior ] + + pileup_table // channel: [ meta, table ] + + contamination_table = calculatecontamination_out_cont // channel: [ meta, contamination ] + segmentation_table = calculatecontamination_out_seg // channel: [ meta, segmentation ] + + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/gatk_tumor_only_somatic_variant_calling/meta.yml b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/meta.yml similarity index 98% rename from subworkflows/nf-core/gatk_tumor_only_somatic_variant_calling/meta.yml rename to subworkflows/local/bam_variant_calling_tumor_only_mutect2/meta.yml index 143296916b..4c41f1f261 100644 --- a/subworkflows/nf-core/gatk_tumor_only_somatic_variant_calling/meta.yml +++ b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/meta.yml @@ -67,7 +67,7 @@ output: - versions: type: file description: File containing software versions - pattern: 'versions.yml' + pattern: "versions.yml" - mutect2_vcf: type: file description: Compressed vcf file to be used for variant_calling. @@ -105,4 +105,4 @@ output: description: file containing statistics of the filtermutectcalls run. pattern: "*.filteringStats.tsv" authors: - - '@GCJMackenzie' + - "@GCJMackenzie" diff --git a/subworkflows/local/bam_variant_calling_tumor_only_tnscope/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_tnscope/main.nf new file mode 100644 index 0000000000..462ef7cba9 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_tumor_only_tnscope/main.nf @@ -0,0 +1,76 @@ +// +// +// SENTIEON TNSCOPE: tumor-only mode variantcalling +// + +include { SENTIEON_TNSCOPE } from '../../../modules/nf-core/sentieon/tnscope/main' +include { GATK4_MERGEVCFS as MERGE_TNSCOPE } from '../../../modules/nf-core/gatk4/mergevcfs/main' + +workflow BAM_VARIANT_CALLING_TUMOR_ONLY_TNSCOPE { + take: + input // channel: [ meta, [ input ], [ input_index ] ] + fasta // channel: /path/to/reference/fasta + fai // channel: /path/to/reference/fasta/index + dict // channel: /path/to/reference/fasta/dictionary + germline_resource // channel: /path/to/germline/resource + germline_resource_tbi // channel: /path/to/germline/index + panel_of_normals // channel: /path/to/panel/of/normals + panel_of_normals_tbi // channel: /path/to/panel/of/normals/index + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + + main: + versions = Channel.empty() + + // Combine input and intervals for spread and gather strategy + input_intervals = input.combine(intervals) + // Move num_intervals to meta map and reorganize channel for TNSCOPE module + .map{ meta, input_, index, intervals_, num_intervals -> [ meta + [ num_intervals:num_intervals ], input_, index, intervals_ ] } + + SENTIEON_TNSCOPE( + input_intervals, + fasta, + fai, + germline_resource.map{resource -> [[id: "resource"], resource]}, + germline_resource_tbi.map{index -> [[id: "resource"], index]}, + panel_of_normals.map{pon -> [[id: "pon"], pon]}, + panel_of_normals_tbi.map{index -> [[id: "pon"], index]}, + [[],[]], // cosmic + [[],[]] // cosmic_tbi + ) + versions = versions.mix(SENTIEON_TNSCOPE.out.versions) + + // Figuring out if there is one or more vcf(s) from the same sample + vcf_branch = SENTIEON_TNSCOPE.out.vcf.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + // Figuring out if there is one or more tbi(s) from the same sample + tbi_branch = SENTIEON_TNSCOPE.out.index.branch{ + // Use meta.num_intervals to asses number of intervals + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + vcf_to_merge = vcf_branch.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ] }.groupTuple() + + // Merge if required + MERGE_TNSCOPE(vcf_to_merge, dict) + versions = versions.mix(MERGE_TNSCOPE.out.versions) + + // Mix intervals and no_intervals channels together + // Remove unnecessary metadata and add variantcaller + vcf = Channel.empty() + .mix(MERGE_TNSCOPE.out.vcf, vcf_branch.no_intervals) + .map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'sentieon_tnscope' ], vcf ] } + + index = Channel.empty() + .mix(MERGE_TNSCOPE.out.tbi, tbi_branch.no_intervals) + .map{ meta, tbi -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'sentieon_tnscope' ], tbi ] } + + emit: + vcf // channel: [ meta, vcf ] + tbi = index // channel: [ meta, tbi ] + index // channel: [ meta, index ] + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_variant_calling_tumor_only_tnscope/meta.yml b/subworkflows/local/bam_variant_calling_tumor_only_tnscope/meta.yml new file mode 100644 index 0000000000..4c41f1f261 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_tumor_only_tnscope/meta.yml @@ -0,0 +1,108 @@ +name: gatk_tumor_only_somatic_variant_calling +description: | + Perform variant calling on a single tumor sample using mutect2 tumor only mode. + Run the input bam file through getpileupsummarries and then calculatecontaminationto get the contamination and segmentation tables. + Filter the mutect2 output vcf using filtermutectcalls and the contamination & segmentation tables for additional filtering. +keywords: + - gatk4 + - mutect2 + - getpileupsummaries + - calculatecontamination + - filtermutectcalls + - variant_calling + - tumor_only + - filtered_vcf +modules: + - gatk4/mutect2 + - gatk4/getpileupsummaries + - gatk4/calculatecontamination + - gatk4/filtermutectcalls +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - input: + type: list + description: list containing one BAM file, also able to take CRAM as an input + pattern: "[ *.{bam/cram} ]" + - input_index: + type: list + description: list containing one BAM file indexe, also able to take CRAM index as an input + pattern: "[ *.{bam.bai/cram.crai} ]" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - germline_resource: + type: file + description: Population vcf of germline sequencing, containing allele fractions. + pattern: "*.vcf.gz" + - germline_resource_tbi: + type: file + description: Index file for the germline resource. + pattern: "*.vcf.gz.tbi" + - panel_of_normals: + type: file + description: vcf file to be used as a panel of normals. + pattern: "*.vcf.gz" + - panel_of_normals_tbi: + type: file + description: Index for the panel of normals. + pattern: "*.vcf.gz.tbi" + - interval_file: + type: file + description: File containing intervals. + pattern: "*.interval_list" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - mutect2_vcf: + type: file + description: Compressed vcf file to be used for variant_calling. + pattern: "[ *.vcf.gz ]" + - mutect2_tbi: + type: file + description: Indexes of the mutect2_vcf file + pattern: "[ *vcf.gz.tbi ]" + - mutect2_stats: + type: file + description: Stats files for the mutect2 vcf + pattern: "[ *vcf.gz.stats ]" + - pileup_table: + type: file + description: File containing the pileup summary table. + pattern: "*.pileups.table" + - contamination_table: + type: file + description: File containing the contamination table. + pattern: "*.contamination.table" + - segmentation_table: + type: file + description: Output table containing segmentation of tumor minor allele fractions. + pattern: "*.segmentation.table" + - filtered_vcf: + type: file + description: file containing filtered mutect2 calls. + pattern: "*.vcf.gz" + - filtered_tbi: + type: file + description: tbi file that pairs with filtered vcf. + pattern: "*.vcf.gz.tbi" + - filtered_stats: + type: file + description: file containing statistics of the filtermutectcalls run. + pattern: "*.filteringStats.tsv" +authors: + - "@GCJMackenzie" diff --git a/subworkflows/local/channel_align_create_csv/main.nf b/subworkflows/local/channel_align_create_csv/main.nf new file mode 100644 index 0000000000..96484b1d18 --- /dev/null +++ b/subworkflows/local/channel_align_create_csv/main.nf @@ -0,0 +1,26 @@ +// +// CHANNEL_ALIGN_CREATE_CSV +// + +workflow CHANNEL_ALIGN_CREATE_CSV { + take: + bam_indexed // channel: [mandatory] meta, bam, bai + outdir // + save_output_as_bam // + + main: + // Creating csv files to restart from this step + bam_indexed.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${outdir}/csv") { meta, bam, bai -> + def patient = meta.patient + def sample = meta.sample + def sex = meta.sex + def status = meta.status + def bam_file = "${outdir}/preprocessing/mapped/${sample}/${bam.name}" + def bai_file = "${outdir}/preprocessing/mapped/${sample}/${bai.name}" + + def type = save_output_as_bam ? "bam" : "cram" + def type_index = save_output_as_bam ? "bai" : "crai" + + ["mapped.csv", "patient,sex,status,sample,${type},${type_index}\n${patient},${sex},${status},${sample},${bam_file},${bai_file}\n"] + } +} diff --git a/subworkflows/local/channel_applybqsr_create_csv/main.nf b/subworkflows/local/channel_applybqsr_create_csv/main.nf new file mode 100644 index 0000000000..dd84d4d317 --- /dev/null +++ b/subworkflows/local/channel_applybqsr_create_csv/main.nf @@ -0,0 +1,26 @@ +// +// CHANNEL_APPLYBQSR_CREATE_CSV +// + +workflow CHANNEL_APPLYBQSR_CREATE_CSV { + take: + cram_recalibrated_index // channel: [mandatory] meta, cram, crai + outdir // + save_output_as_bam // + + main: + // Creating csv files to restart from this step + cram_recalibrated_index.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${outdir}/csv") { meta, file, index -> + def patient = meta.patient + def sample = meta.sample + def sex = meta.sex + def status = meta.status + def out_file = "${outdir}/preprocessing/recalibrated/${sample}/${file.name}" + def out_index = "${outdir}/preprocessing/recalibrated/${sample}/${index.name}" + + def type = save_output_as_bam ? "bam" : "cram" + def type_index = save_output_as_bam ? "bai" : "crai" + + ["recalibrated.csv", "patient,sex,status,sample,${type},${type_index}\n${patient},${sex},${status},${sample},${out_file},${out_index}\n"] + } +} diff --git a/subworkflows/local/channel_baserecalibrator_create_csv/main.nf b/subworkflows/local/channel_baserecalibrator_create_csv/main.nf new file mode 100644 index 0000000000..7801cb3857 --- /dev/null +++ b/subworkflows/local/channel_baserecalibrator_create_csv/main.nf @@ -0,0 +1,69 @@ +// +// CHANNEL_BASERECALIBRATOR_CREATE_CSV +// + +workflow CHANNEL_BASERECALIBRATOR_CREATE_CSV { + take: + cram_table_bqsr // channel: [mandatory] meta, cram, crai, table + tools // + skip_tools // + outdir // + save_output_as_bam // + + main: + // Creating csv files to restart from this step + if ( tools && tools.split(',').contains('sentieon_dedup') ) { + cram_table_bqsr.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${outdir}/csv") { meta, cram, crai, _table -> + + def patient = meta.patient + def sample = meta.sample + def sex = meta.sex + def status = meta.status + def suffix_aligned = save_output_as_bam ? "bam" : "cram" + def suffix_index = save_output_as_bam ? "bam.bai" : "cram.crai" + def cram_file = "${outdir}/preprocessing/sentieon_dedup/${sample}/${cram.baseName}.${suffix_aligned}" + def crai_file = "${outdir}/preprocessing/sentieon_dedup/${sample}/${crai.baseName.minus(".cram")}.${suffix_index}" + def table_file = "${outdir}/preprocessing/recal_table/${sample}/${sample}.recal.table" + + def type = save_output_as_bam ? "bam" : "cram" + def type_index = save_output_as_bam ? "bai" : "crai" + + ["markduplicates.csv", "patient,sex,status,sample,${type},${type_index},table\n${patient},${sex},${status},${sample},${cram_file},${crai_file},${table_file}\n"] + } + } else if (!(skip_tools && (skip_tools.split(',').contains('markduplicates')))) { + cram_table_bqsr.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${outdir}/csv") { meta, cram, crai, _table -> + + def patient = meta.patient + def sample = meta.sample + def sex = meta.sex + def status = meta.status + def suffix_aligned = save_output_as_bam ? "bam" : "cram" + def suffix_index = save_output_as_bam ? "bam.bai" : "cram.crai" + def cram_file = "${outdir}/preprocessing/markduplicates/${sample}/${cram.baseName}.${suffix_aligned}" + def crai_file = "${outdir}/preprocessing/markduplicates/${sample}/${crai.baseName.minus(".cram")}.${suffix_index}" + def table_file = "${outdir}/preprocessing/recal_table/${sample}/${sample}.recal.table" + + def type = save_output_as_bam ? "bam" : "cram" + def type_index = save_output_as_bam ? "bai" : "crai" + + ["markduplicates.csv", "patient,sex,status,sample,${type},${type_index},table\n${patient},${sex},${status},${sample},${cram_file},${crai_file},${table_file}\n"] + } + } else { + cram_table_bqsr.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${outdir}/csv") { meta, cram, crai, _table -> + def patient = meta.patient + def sample = meta.sample + def sex = meta.sex + def status = meta.status + def suffix_aligned = save_output_as_bam ? "bam" : "cram" + def suffix_index = save_output_as_bam ? "bam.bai" : "cram.crai" + def cram_file = "${outdir}/preprocessing/${sample}/mapped/${cram.baseName}.${suffix_aligned}" + def crai_file = "${outdir}/preprocessing/${sample}/mapped/${crai.baseName.minus(".cram")}.${suffix_index}" + def table_file = "${outdir}/preprocessing/${sample}/recal_table/${sample}.recal.table" + + def type = save_output_as_bam ? "bam" : "cram" + def type_index = save_output_as_bam ? "bai" : "crai" + + ["sorted.csv", "patient,sex,status,sample,${type},${type_index},table\n${patient},${sex},${status},${sample},${cram_file},${crai_file},${table_file}\n"] + } + } +} diff --git a/subworkflows/local/channel_markduplicates_create_csv/main.nf b/subworkflows/local/channel_markduplicates_create_csv/main.nf new file mode 100644 index 0000000000..60df4fb187 --- /dev/null +++ b/subworkflows/local/channel_markduplicates_create_csv/main.nf @@ -0,0 +1,29 @@ +// +// CHANNEL_MARKDUPLICATES_CREATE_CSV +// + +workflow CHANNEL_MARKDUPLICATES_CREATE_CSV { + take: + cram_markduplicates // channel: [mandatory] meta, cram, crai + csv_subfolder // + outdir // + save_output_as_bam // + + main: + // Creating csv files to restart from this step + cram_markduplicates.collectFile(keepHeader: true, skip: 1, sort: true, storeDir: "${outdir}/csv") { meta, file, index -> + def patient = meta.patient + def sample = meta.sample + def sex = meta.sex + def status = meta.status + def suffix_aligned = save_output_as_bam ? "bam" : "cram" + def suffix_index = save_output_as_bam ? "bam.bai" : "cram.crai" + def align_file = "${outdir}/preprocessing/${csv_subfolder}/${sample}/${file.baseName}.${suffix_aligned}" + def align_index = "${outdir}/preprocessing/${csv_subfolder}/${sample}/${index.baseName.minus(".cram")}.${suffix_index}" + + def type = save_output_as_bam ? "bam" : "cram" + def type_index = save_output_as_bam ? "bai" : "crai" + + ["markduplicates_no_table.csv", "patient,sex,status,sample,${type},${type_index}\n${patient},${sex},${status},${sample},${align_file},${align_index}\n"] + } +} diff --git a/subworkflows/local/channel_variant_calling_create_csv/main.nf b/subworkflows/local/channel_variant_calling_create_csv/main.nf new file mode 100644 index 0000000000..ea389f092b --- /dev/null +++ b/subworkflows/local/channel_variant_calling_create_csv/main.nf @@ -0,0 +1,19 @@ +// +// CHANNEL_VARIANT_CALLING_CREATE_CSV +// + +workflow CHANNEL_VARIANT_CALLING_CREATE_CSV { + take: + vcf_to_annotate // channel: [mandatory] meta, vcf + outdir // + + main: + // Creating csv files to restart from this step + vcf_to_annotate.collectFile(keepHeader: true, skip: 1,sort: true, storeDir: "${outdir}/csv"){ meta, vcf -> + def patient = meta.patient + def sample = meta.id + def variantcaller = meta.variantcaller + vcf = "${outdir}/variant_calling/${variantcaller}/${meta.id}/${vcf.getName()}" + ["variantcalled.csv", "patient,sample,variantcaller,vcf\n${patient},${sample},${variantcaller},${vcf}\n"] + } +} diff --git a/subworkflows/local/cram_merge_index_samtools/main.nf b/subworkflows/local/cram_merge_index_samtools/main.nf new file mode 100644 index 0000000000..def52fc2c5 --- /dev/null +++ b/subworkflows/local/cram_merge_index_samtools/main.nf @@ -0,0 +1,45 @@ +// +// MERGE INDEX CRAM +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { SAMTOOLS_INDEX as INDEX_CRAM } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_MERGE as MERGE_CRAM } from '../../../modules/nf-core/samtools/merge/main' + +workflow CRAM_MERGE_INDEX_SAMTOOLS { + take: + cram // channel: [mandatory] meta, cram + fasta // channel: [mandatory] meta, fasta + fasta_fai // channel: [mandatory] meta, fai + + main: + versions = channel.empty() + + // Figuring out if there is one or more cram(s) from the same sample + cram_to_merge = cram.branch { meta, cram_files -> + single: cram_files.size() <= 1 + return [meta, cram_files[0]] + multiple: cram_files.size() > 1 + } + + // Only when using intervals + MERGE_CRAM(cram_to_merge.multiple, fasta, fasta_fai) + + // Mix intervals and no_intervals channels together + cram_all = MERGE_CRAM.out.cram.mix(cram_to_merge.single) + + // Index cram + INDEX_CRAM(cram_all) + + // Join with the crai file + cram_crai = cram_all.join(INDEX_CRAM.out.crai, failOnDuplicate: true, failOnMismatch: true) + + // Gather versions of all tools used + versions = versions.mix(INDEX_CRAM.out.versions) + versions = versions.mix(MERGE_CRAM.out.versions) + + emit: + cram_crai + versions +} diff --git a/subworkflows/local/cram_qc_mosdepth_samtools/main.nf b/subworkflows/local/cram_qc_mosdepth_samtools/main.nf new file mode 100644 index 0000000000..e53e69360e --- /dev/null +++ b/subworkflows/local/cram_qc_mosdepth_samtools/main.nf @@ -0,0 +1,37 @@ +// +// QC on CRAM +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { MOSDEPTH } from '../../../modules/nf-core/mosdepth/main' + +workflow CRAM_QC_MOSDEPTH_SAMTOOLS { + take: + cram // channel: [mandatory] [ meta, cram, crai ] + fasta // channel: [mandatory] [ fasta ] + intervals + + main: + versions = Channel.empty() + reports = Channel.empty() + + // Reports run on cram + SAMTOOLS_STATS(cram, fasta) + + MOSDEPTH(cram.combine(intervals.map { meta, bed -> [bed ?: []] }), fasta) + + // Gather all reports generated + reports = reports.mix(SAMTOOLS_STATS.out.stats) + reports = reports.mix(MOSDEPTH.out.global_txt) + reports = reports.mix(MOSDEPTH.out.regions_txt) + + // Gather versions of all tools used + versions = versions.mix(MOSDEPTH.out.versions) + versions = versions.mix(SAMTOOLS_STATS.out.versions) + + emit: + reports + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/cram_sampleqc/main.nf b/subworkflows/local/cram_sampleqc/main.nf new file mode 100644 index 0000000000..1e664186f7 --- /dev/null +++ b/subworkflows/local/cram_sampleqc/main.nf @@ -0,0 +1,43 @@ +include { BAM_NGSCHECKMATE } from '../../../subworkflows/nf-core/bam_ngscheckmate' +include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_RECAL } from '../../../subworkflows/local/cram_qc_mosdepth_samtools' + +workflow CRAM_SAMPLEQC { + take: + cram // channel: [ val(meta), cram, crai ] + ngscheckmate_bed // channel: [ ngscheckmate_bed ] + fasta // channel: [ fasta ] + skip_baserecalibration // boolean: + intervals_for_preprocessing // channel: + + main: + + versions = Channel.empty() + reports = Channel.empty() + + if (!skip_baserecalibration) { + + CRAM_QC_RECAL( + cram, + fasta, + intervals_for_preprocessing, + ) + + // Gather QC reports + reports = CRAM_QC_RECAL.out.reports.collect { _meta, report -> report } + + // Gather used softwares versions + versions = versions.mix(CRAM_QC_RECAL.out.versions) + } + + BAM_NGSCHECKMATE(cram.map { meta, cram_, _crai -> [meta, cram_] }, ngscheckmate_bed.map { bed -> [[id: "ngscheckmate"], bed] }, fasta) + versions = versions.mix(BAM_NGSCHECKMATE.out.versions) + + emit: + corr_matrix = BAM_NGSCHECKMATE.out.corr_matrix // channel: [ meta, corr_matrix ] + matched = BAM_NGSCHECKMATE.out.matched // channel: [ meta, matched ] + all = BAM_NGSCHECKMATE.out.all // channel: [ meta, all ] + vcf = BAM_NGSCHECKMATE.out.vcf // channel: [ meta, vcf ] + pdf = BAM_NGSCHECKMATE.out.pdf // channel: [ meta, pdf ] + reports + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/download_cache_snpeff_vep/main.nf b/subworkflows/local/download_cache_snpeff_vep/main.nf new file mode 100644 index 0000000000..e2ff3eacfd --- /dev/null +++ b/subworkflows/local/download_cache_snpeff_vep/main.nf @@ -0,0 +1,26 @@ +// +// DOWNLOAD CACHE SNPEFF VEP +// + +// Initialize channels based on params or indices that were just built +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run +// Condition is based on params.step and params.tools +// If and extra condition exists, it's specified in comments + +include { ENSEMBLVEP_DOWNLOAD } from '../../../modules/nf-core/ensemblvep/download/main' +include { SNPEFF_DOWNLOAD } from '../../../modules/nf-core/snpeff/download/main' + +workflow DOWNLOAD_CACHE_SNPEFF_VEP { + take: + ensemblvep_info + snpeff_info + + main: + ENSEMBLVEP_DOWNLOAD(ensemblvep_info) + SNPEFF_DOWNLOAD(snpeff_info) + + emit: + ensemblvep_cache = ENSEMBLVEP_DOWNLOAD.out.cache.collect() // channel: [ meta, cache ] + snpeff_cache = SNPEFF_DOWNLOAD.out.cache.collect() // channel: [ meta, cache ] +} diff --git a/subworkflows/local/fastq_align/main.nf b/subworkflows/local/fastq_align/main.nf new file mode 100644 index 0000000000..92a2b3e4f9 --- /dev/null +++ b/subworkflows/local/fastq_align/main.nf @@ -0,0 +1,56 @@ +// +// MAPPING +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main' +include { BWA_MEM as BWAMEM1_MEM } from '../../../modules/nf-core/bwa/mem/main' +include { DRAGMAP_ALIGN } from '../../../modules/nf-core/dragmap/align/main' +include { SENTIEON_BWAMEM } from '../../../modules/nf-core/sentieon/bwamem/main' + +workflow FASTQ_ALIGN { + take: + reads // channel: [mandatory] meta, reads + index // channel: [mandatory] index + sort // boolean: [mandatory] true -> sort, false -> don't sort + fasta + fasta_fai + + main: + + versions = channel.empty() + reports = channel.empty() + + // Only one of the following should be run + BWAMEM1_MEM(reads, index, [[id:'no_fasta'], []], sort) // If aligner is bwa-mem + BWAMEM2_MEM(reads, index, [[id:'no_fasta'], []], sort) // If aligner is bwa-mem2 + DRAGMAP_ALIGN(reads, index, [[id:'no_fasta'], []], sort) // If aligner is dragmap + // The sentieon-bwamem-module does sorting as part of the conversion from sam to bam. + SENTIEON_BWAMEM(reads, index, fasta, fasta_fai) // If aligner is sentieon-bwamem + + // Get the bam files from the aligner + // Only one aligner is run + bam = channel.empty() + bam = bam.mix(BWAMEM1_MEM.out.bam) + bam = bam.mix(BWAMEM2_MEM.out.bam) + bam = bam.mix(DRAGMAP_ALIGN.out.bam) + bam = bam.mix(SENTIEON_BWAMEM.out.bam_and_bai.map{ meta, bam_file, _bai -> [ meta, bam_file ] }) + + bai = SENTIEON_BWAMEM.out.bam_and_bai.map{ meta, _bam_file, bai -> [ meta, bai ] } + + // Gather reports of all tools used + reports = reports.mix(DRAGMAP_ALIGN.out.log) + + // Gather versions of all tools used + versions = versions.mix(BWAMEM1_MEM.out.versions) + versions = versions.mix(BWAMEM2_MEM.out.versions) + versions = versions.mix(DRAGMAP_ALIGN.out.versions) + versions = versions.mix(SENTIEON_BWAMEM.out.versions) + + emit: + bam // channel: [ [meta], bam ] + bai // channel: [ [meta], bai ] + reports + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf b/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf new file mode 100644 index 0000000000..68851c58b7 --- /dev/null +++ b/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf @@ -0,0 +1,87 @@ +// +// Runs FGBIO tools to remove UMI tags from FASTQ reads +// Convert them to unmapped BAM file, map them to the reference genome, +// use the mapped information to group UMIs and generate consensus reads +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { FGBIO_CALLMOLECULARCONSENSUSREADS as CALLUMICONSENSUS } from '../../../modules/nf-core/fgbio/callmolecularconsensusreads/main.nf' +include { FGBIO_FASTQTOBAM as FASTQTOBAM } from '../../../modules/nf-core/fgbio/fastqtobam/main' +include { FGBIO_GROUPREADSBYUMI as GROUPREADSBYUMI } from '../../../modules/nf-core/fgbio/groupreadsbyumi/main' +include { FASTQ_ALIGN as ALIGN_UMI } from '../fastq_align/main' +include { SAMTOOLS_MERGE as MERGE_CONSENSUS } from '../../../modules/nf-core/samtools/merge/main' +include { SAMTOOLS_BAM2FQ as BAM2FASTQ } from '../../../modules/nf-core/samtools/bam2fq/main.nf' + +workflow FASTQ_CREATE_UMI_CONSENSUS_FGBIO { + take: + reads // channel: [mandatory] [ val(meta), [ reads ] ] + fasta // channel: [mandatory] /path/to/reference/fasta + fai // channel: [optional] /path/to/reference/fasta_fai, needed for Sentieon + map_index // channel: [mandatory] Pre-computed mapping index + groupreadsbyumi_strategy // string: [mandatory] grouping strategy - default: "Adjacency" + + main: + ch_versions = channel.empty() + + // params.umi_read_structure is passed out as ext.args + // FASTQ reads are converted into a tagged unmapped BAM file (uBAM) + FASTQTOBAM(reads) + + // in order to map uBAM using BWA MEM, we need to convert uBAM to FASTQ + // TODO check if DRAGMAP works well with BAM inputs + // but keep the appropriate UMI tags in the FASTQ comment field and produce + // an interleaved FASQT file (hence, split = false) + split = false + BAM2FASTQ(FASTQTOBAM.out.bam, split) + + // appropriately tagged interleaved FASTQ reads are mapped to the reference + // bams will not be sorted (hence, sort = false) + sort = false + ALIGN_UMI(BAM2FASTQ.out.reads, map_index, sort, fasta, fai) + + bams_to_merge = ALIGN_UMI.out.bam + // id currently includes the lane, so swap to just id=sample and groupKey to avoid blocking + // Remove lane-specific fields (id, sample_lane_id) so groupTuple can match lanes from the same sample + .map { meta, bam -> + def clean_meta = meta - meta.subMap('id', 'sample_lane_id') + [id: meta.sample] + tuple( groupKey(clean_meta, meta.num_lanes), bam) + } + .groupTuple() + // undo the groupKey, else the meta map is not a normal map. + .map { meta, bam -> tuple(meta.target, bam) } + .branch { meta, bam -> + single: meta.num_lanes <= 1 + return [meta, bam[0]] + multiple: meta.num_lanes > 1 + } + + // Merge across runs/lanes for the same sample + MERGE_CONSENSUS(bams_to_merge.multiple, [[], []], [[], []]) + + bams_all = MERGE_CONSENSUS.out.bam.mix(bams_to_merge.single) + + // appropriately tagged reads are now grouped by UMI information + GROUPREADSBYUMI(bams_all, groupreadsbyumi_strategy) + + // Using newly created groups + // To call a consensus across reads in the same group + // And emit a consensus BAM file + // TODO: add params for call_min_reads and call_min_baseq + call_min_reads = 1 + call_min_baseq = 10 + CALLUMICONSENSUS(GROUPREADSBYUMI.out.bam, call_min_reads, call_min_baseq) + + ch_versions = ch_versions.mix(BAM2FASTQ.out.versions) + ch_versions = ch_versions.mix(ALIGN_UMI.out.versions) + ch_versions = ch_versions.mix(CALLUMICONSENSUS.out.versions) + ch_versions = ch_versions.mix(FASTQTOBAM.out.versions) + ch_versions = ch_versions.mix(GROUPREADSBYUMI.out.versions) + ch_versions = ch_versions.mix(MERGE_CONSENSUS.out.versions) + + emit: + umibam = FASTQTOBAM.out.bam // channel: [ val(meta), [ bam ] ] + groupbam = GROUPREADSBYUMI.out.bam // channel: [ val(meta), [ bam ] ] + consensusbam = CALLUMICONSENSUS.out.bam // channel: [ val(meta), [ bam ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/fastq_preprocess_gatk/main.nf b/subworkflows/local/fastq_preprocess_gatk/main.nf new file mode 100644 index 0000000000..cabdb53091 --- /dev/null +++ b/subworkflows/local/fastq_preprocess_gatk/main.nf @@ -0,0 +1,527 @@ +// Create samplesheets to restart from different steps +include { CHANNEL_ALIGN_CREATE_CSV } from '../../../subworkflows/local/channel_align_create_csv/main' +include { CHANNEL_MARKDUPLICATES_CREATE_CSV } from '../../../subworkflows/local/channel_markduplicates_create_csv/main' +include { CHANNEL_BASERECALIBRATOR_CREATE_CSV } from '../../../subworkflows/local/channel_baserecalibrator_create_csv/main' +include { CHANNEL_APPLYBQSR_CREATE_CSV } from '../../../subworkflows/local/channel_applybqsr_create_csv/main' + +// Convert BAM files to FASTQ files +include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_UMI } from '../../../subworkflows/local/bam_convert_samtools/main' + +// TRIM/SPLIT FASTQ Files +include { FASTP } from '../../../modules/nf-core/fastp/main' + +// remove genomic contaminants with bbsplit +include { BBMAP_BBSPLIT } from '../../../modules/nf-core/bbmap/bbsplit' +//TODO: WHAT ABOUT BBSPLIT RUNS WITH PARABRICKS? + +// Create umi consensus bams from fastq +include { FASTQ_CREATE_UMI_CONSENSUS_FGBIO } from '../../../subworkflows/local/fastq_create_umi_consensus_fgbio/main' + +// Map input reads to reference genome +include { FASTQ_ALIGN } from '../../../subworkflows/local/fastq_align/main' + +// Merge and index BAM files (optional) +include { BAM_MERGE_INDEX_SAMTOOLS } from '../../../subworkflows/local/bam_merge_index_samtools/main' + +// Convert BAM files +include { SAMTOOLS_CONVERT as BAM_TO_CRAM_MAPPING } from '../../../modules/nf-core/samtools/convert/main' + +// Convert CRAM files (optional) +include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../../../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM_RECAL } from '../../../modules/nf-core/samtools/convert/main' + +// Copy UMIs from read name to RX tag +include { FGBIO_COPYUMIFROMREADNAME } from '../../../modules/nf-core/fgbio/copyumifromreadname/main' + +// Mark Duplicates (+QC) +include { BAM_MARKDUPLICATES } from '../../../subworkflows/local/bam_markduplicates/main' +include { BAM_MARKDUPLICATES_SPARK } from '../../../subworkflows/local/bam_markduplicates_spark/main' +include { BAM_SENTIEON_DEDUP } from '../../../subworkflows/local/bam_sentieon_dedup/main' + +// QC on CRAM +include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_NO_MD } from '../../../subworkflows/local/cram_qc_mosdepth_samtools/main' + +// Create recalibration tables +include { BAM_BASERECALIBRATOR } from '../../../subworkflows/local/bam_baserecalibrator/main' +include { BAM_BASERECALIBRATOR_SPARK } from '../../../subworkflows/local/bam_baserecalibrator_spark/main' + +// Create recalibrated cram files to use for variant calling (+QC) +include { BAM_APPLYBQSR } from '../../../subworkflows/local/bam_applybqsr/main' +include { BAM_APPLYBQSR_SPARK } from '../../../subworkflows/local/bam_applybqsr_spark/main' + +workflow FASTQ_PREPROCESS_GATK { + take: + input_fastq + input_sample + dict + fasta + fasta_fai + index_alignment + intervals_and_num_intervals + intervals_for_preprocessing + known_sites_indels + known_sites_indels_tbi + bbsplit_index + + main: + + // To gather all QC reports for MultiQC + reports = channel.empty() + versions = channel.empty() + + // PREPROCESSING + + if (params.step == 'mapping') { + + // STEP 0: QC & TRIM + // Trim only with `--trim_fastq` + // Additional options to be set up + + // UMI consensus calling + if (params.umi_read_structure) { + FASTQ_CREATE_UMI_CONSENSUS_FGBIO( + input_fastq, + fasta, + fasta_fai, + index_alignment, + params.group_by_umi_strategy) + + bam_converted_from_fastq = FASTQ_CREATE_UMI_CONSENSUS_FGBIO.out.consensusbam.map{ meta, bam -> [ meta, bam, [] ] } + + // Convert back to fastq for further preprocessing + // fasta are not needed when converting bam to fastq -> [ id:"fasta" ], [] + // No need for fasta.fai -> [] + interleave_input = false // Currently don't allow interleaved input + CONVERT_FASTQ_UMI( + bam_converted_from_fastq, + [ [ id:"fasta" ], [] ], // fasta + [ [ id:'null' ], [] ], // fasta_fai + interleave_input) + + reads_for_fastp = CONVERT_FASTQ_UMI.out.reads + + // Gather used softwares versions + versions = versions.mix(CONVERT_FASTQ_UMI.out.versions) + versions = versions.mix(FASTQ_CREATE_UMI_CONSENSUS_FGBIO.out.versions) + } else { + reads_for_fastp = input_fastq + } + + // Trimming and/or splitting + if (params.trim_fastq || params.split_fastq > 0 || params.umi_location) { + + save_trimmed_fail = false + save_merged = false + FASTP( + reads_for_fastp, + [], // we are not using any adapter fastas at the moment + false, // we don't use discard_trimmed_pass at the moment + save_trimmed_fail, + save_merged + ) + + reports = reports.mix(FASTP.out.json.collect{ _meta, json -> json }) + reports = reports.mix(FASTP.out.html.collect{ _meta, html -> html }) + + if (params.split_fastq) { + reads_for_bbsplit = FASTP.out.reads.map{ meta, reads -> + def read_files = reads.sort(false) { a,b -> a.getName().tokenize('.')[0] <=> b.getName().tokenize('.')[0] }.collate(2) + [ meta + [ n_fastq: read_files.size() ], read_files ] + }.transpose() + } else reads_for_bbsplit = FASTP.out.reads + + versions = versions.mix(FASTP.out.versions) + + } else { + reads_for_bbsplit = reads_for_fastp + } + + // + // MODULE: Remove genome contaminant reads + // + if (params.tools && params.tools.split(',').contains('bbsplit')) { + + reads_for_alignment = BBMAP_BBSPLIT ( + reads_for_bbsplit, + bbsplit_index, + [], + [ [], [] ], + false + ) + .primary_fastq + + reports = reports.mix(BBMAP_BBSPLIT.out.stats.collect{ _meta, stats -> stats }) + + } else { + reads_for_alignment = reads_for_bbsplit + } + + + // STEP 1: MAPPING READS TO REFERENCE GENOME + // First, we must calculate number of lanes for each sample (meta.n_fastq) + // This is needed to group reads from the same sample together using groupKey to avoid stalling the workflow + // when reads from different samples are mixed together + reads_for_alignment.map { meta, reads -> + [ meta.subMap('patient', 'sample', 'sex', 'status'), reads ] + } + .groupTuple() + .map { meta, reads -> + meta + [ n_fastq: reads.size() ] // We can drop the FASTQ files now that we know how many there are + } + .set { reads_grouping_key } + + reads_for_alignment = reads_for_alignment.map{ meta, reads -> + // Update meta.id to meta.sample no multiple lanes or splitted fastqs + if (meta.size * meta.num_lanes == 1) [ meta + [ id:meta.sample ], reads ] + else [ meta, reads ] + } + + // reads will be sorted + sort_bam = true + FASTQ_ALIGN(reads_for_alignment, index_alignment, sort_bam, fasta, fasta_fai) + + aligned_bam = channel.empty() + aligned_bai = channel.empty() + // If UMIs started in read header or were put there by fastp, copy to RX tag + if (params.umi_in_read_header || params.umi_location) { + FGBIO_COPYUMIFROMREADNAME(FASTQ_ALIGN.out.bam.map{meta, bam -> [meta, bam, []]}) + aligned_bam = FGBIO_COPYUMIFROMREADNAME.out.bam + aligned_bai = FGBIO_COPYUMIFROMREADNAME.out.bai + versions = versions.mix(FGBIO_COPYUMIFROMREADNAME.out.versions) + } else { + aligned_bam = FASTQ_ALIGN.out.bam + aligned_bai = FASTQ_ALIGN.out.bai + } + + // Grouping the bams from the same samples not to stall the workflow + // Use groupKey to make sure that the correct group can advance as soon as it is complete + // and not stall the workflow until all reads from all channels are mapped + bam_mapped = aligned_bam + .combine(reads_grouping_key) // Creates a tuple of [ meta, bam, reads_grouping_key ] + .filter { meta1, _bam, meta2 -> meta1.sample == meta2.sample } + // Add n_fastq and other variables to meta + .map { meta1, bam, meta2 -> + [ meta1 + meta2, bam ] + } + // Manipulate meta map to remove old fields and add new ones + .map { meta, bam -> + [ meta - meta.subMap('id', 'read_group', 'data_type', 'size', 'sample_lane_id', 'lane') + [ data_type: 'bam', id: meta.sample ], bam ] + } + // Create groupKey from meta map + .map { meta, bam -> + [ groupKey( meta, meta.n_fastq), bam ] + } + // Group + .groupTuple() + + bai_mapped = aligned_bai + .combine(reads_grouping_key) // Creates a tuple of [ meta, bai, reads_grouping_key ] + .filter { meta1, _bai, meta2 -> meta1.sample == meta2.sample } + // Add n_fastq and other variables to meta + .map { meta1, bai, meta2 -> + [ meta1 + meta2, bai ] + } + // Manipulate meta map to remove old fields and add new ones + .map { meta, bai -> + [ meta - meta.subMap('id', 'read_group', 'data_type', 'size', 'sample_lane_id', 'lane') + [ data_type: 'bai', id: meta.sample ], bai ] + } + // Create groupKey from meta map + .map { meta, bai -> + [ groupKey( meta, meta.n_fastq), bai ] + } + // Group + .groupTuple() + + + // gatk4 markduplicates can handle multiple bams as input, so no need to merge/index here + // Except if and only if save_mapped or (skipping markduplicates and sentieon-dedup) + if ( + params.save_mapped || + ( + (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) && + !(params.tools && params.tools.split(',').contains('sentieon_dedup')) + ) + ) { + // bams are merged (when multiple lanes from the same sample), indexed and then converted to cram + BAM_MERGE_INDEX_SAMTOOLS(bam_mapped) + + BAM_TO_CRAM_MAPPING(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai, fasta, fasta_fai) + // Create CSV to restart from this step + if (params.save_output_as_bam) CHANNEL_ALIGN_CREATE_CSV(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai, params.outdir, params.save_output_as_bam) + else CHANNEL_ALIGN_CREATE_CSV(BAM_TO_CRAM_MAPPING.out.cram.join(BAM_TO_CRAM_MAPPING.out.crai, failOnDuplicate: true, failOnMismatch: true), params.outdir, params.save_output_as_bam) + + // Gather used softwares versions + versions = versions.mix(BAM_MERGE_INDEX_SAMTOOLS.out.versions) + versions = versions.mix(BAM_TO_CRAM_MAPPING.out.versions) + } + + // Gather used softwares versions + versions = versions.mix(FASTQ_ALIGN.out.versions) + } + + if (params.step in ['mapping', 'markduplicates']) { + + // ch_cram_no_markduplicates_restart = channel.empty() + cram_markduplicates_no_spark = channel.empty() + cram_sentieon_dedup = channel.empty() + cram_markduplicates_spark = channel.empty() + + // STEP 2: markduplicates (+QC) + convert to CRAM + + // ch_bam_for_markduplicates will contain bam mapped with FASTQ_ALIGN when step is mapping + // Or bams that are specified in the samplesheet.csv when step is prepare_recalibration + cram_for_markduplicates = params.step == 'mapping' ? bam_mapped : input_sample.map{ meta, input, _index -> [ meta, input ] } + + if(params.step == 'markduplicates' && params.umi_in_read_header) { + FGBIO_COPYUMIFROMREADNAME(cram_for_markduplicates.map{ meta, bam -> [ meta, bam, [] ] }) + cram_for_markduplicates = FGBIO_COPYUMIFROMREADNAME.out.bam + versions = versions.mix(FGBIO_COPYUMIFROMREADNAME.out.versions) + } + + // if no MD is done, then run QC on mapped & converted CRAM files + // or the input BAM (+converted) or CRAM files + cram_skip_markduplicates = channel.empty() + + // Should it be possible to restart from converted crams? + // For now, conversion from bam to cram is only done when skipping markduplicates + + if ( + params.skip_tools && + params.skip_tools.split(',').contains('markduplicates') && + !(params.tools && params.tools.split(',').contains('sentieon_dedup')) + ) { + if (params.step == 'mapping') { + cram_skip_markduplicates = BAM_TO_CRAM_MAPPING.out.cram.join(BAM_TO_CRAM_MAPPING.out.crai, failOnDuplicate: true, failOnMismatch: true) + } else { + cram_skip_markduplicates = channel.empty().mix(input_sample) + } + + CRAM_QC_NO_MD(cram_skip_markduplicates, fasta, intervals_for_preprocessing) + + // Gather QC reports + reports = reports.mix(CRAM_QC_NO_MD.out.reports.collect{ _meta, report -> [ report ] }) + + // Gather used softwares versions + versions = versions.mix(CRAM_QC_NO_MD.out.versions) + } else if (params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates')) { + BAM_MARKDUPLICATES_SPARK( + cram_for_markduplicates, + dict, + fasta, + fasta_fai, + intervals_for_preprocessing) + cram_markduplicates_spark = BAM_MARKDUPLICATES_SPARK.out.cram + + // Gather QC reports + reports = reports.mix(BAM_MARKDUPLICATES_SPARK.out.reports.collect{ _meta, report -> [ report ] }) + + // Gather used softwares versions + versions = versions.mix(BAM_MARKDUPLICATES_SPARK.out.versions) + } else if (params.tools && params.tools.split(',').contains('sentieon_dedup')) { + crai_for_markduplicates = params.step == 'mapping' + ? bai_mapped + : ( params.umi_in_read_header ? FGBIO_COPYUMIFROMREADNAME.out.bai : input_sample.map{ meta, _input, index -> [ meta, index ] } ) + BAM_SENTIEON_DEDUP( + cram_for_markduplicates, + crai_for_markduplicates, + fasta, + fasta_fai, + intervals_for_preprocessing) + + cram_sentieon_dedup = BAM_SENTIEON_DEDUP.out.cram + + // Gather QC reports + reports = reports.mix(BAM_SENTIEON_DEDUP.out.reports.collect{ _meta, report -> [ report ] }) + + // Gather used softwares versions + versions = versions.mix(BAM_SENTIEON_DEDUP.out.versions) + } else { + + BAM_MARKDUPLICATES( + cram_for_markduplicates, + fasta, + fasta_fai, + intervals_for_preprocessing) + + cram_markduplicates_no_spark = BAM_MARKDUPLICATES.out.cram + + // Gather QC reports + reports = reports.mix(BAM_MARKDUPLICATES.out.reports.collect{ _meta, report -> [ report ] }) + + // Gather used softwares versions + versions = versions.mix(BAM_MARKDUPLICATES.out.versions) + } + + // ch_md_cram_for_restart contains either: + // - crams from markduplicates + // - crams from sentieon_dedup + // - crams from markduplicates_spark + // - crams from input step markduplicates --> from the converted ones only? + ch_md_cram_for_restart = channel.empty().mix(cram_markduplicates_no_spark, cram_markduplicates_spark, cram_sentieon_dedup) + // Make sure correct data types are carried through + .map{ meta, cram, crai -> [ meta + [data_type: "cram"], cram, crai ] } + + // If params.save_output_as_bam, then convert CRAM files to BAM + CRAM_TO_BAM(ch_md_cram_for_restart, fasta, fasta_fai) + versions = versions.mix(CRAM_TO_BAM.out.versions) + + // CSV should be written for the file actually out, either CRAM or BAM + // Create CSV to restart from this step + csv_subfolder = (params.tools && params.tools.split(',').contains('sentieon_dedup')) ? 'sentieon_dedup' : 'markduplicates' + + if (params.save_output_as_bam) CHANNEL_MARKDUPLICATES_CREATE_CSV(CRAM_TO_BAM.out.bam.join(CRAM_TO_BAM.out.bai, failOnDuplicate: true, failOnMismatch: true), csv_subfolder, params.outdir, params.save_output_as_bam) + else CHANNEL_MARKDUPLICATES_CREATE_CSV(ch_md_cram_for_restart, csv_subfolder, params.outdir, params.save_output_as_bam) + } + + if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration']) { + + // Run if starting from step "prepare_recalibration" + if (params.step == 'prepare_recalibration') { + + ch_cram_for_bam_baserecalibrator = channel.empty().mix(input_sample) + + // Set the input samples for restart so we generate a samplesheet that contains the input files together with the recalibration table + ch_md_cram_for_restart = ch_cram_for_bam_baserecalibrator + + } else { + + // ch_cram_for_bam_baserecalibrator contains either: + // - crams from markduplicates + // - crams from markduplicates_spark + // - crams converted from bam mapped when skipping markduplicates + // - input cram files, when start from step markduplicates + ch_cram_for_bam_baserecalibrator = channel.empty().mix(ch_md_cram_for_restart, cram_skip_markduplicates ) + // Make sure correct data types are carried through + .map{ meta, cram, crai -> [ meta + [data_type: "cram"], cram, crai ] } + + } + + // STEP 3: Create recalibration tables + if (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'))) { + + ch_table_bqsr_no_spark = channel.empty() + ch_table_bqsr_spark = channel.empty() + + if (params.use_gatk_spark && params.use_gatk_spark.contains('baserecalibrator')) { + BAM_BASERECALIBRATOR_SPARK( + ch_cram_for_bam_baserecalibrator, + dict, + fasta, + fasta_fai, + intervals_and_num_intervals, + known_sites_indels, + known_sites_indels_tbi) + + ch_table_bqsr_spark = BAM_BASERECALIBRATOR_SPARK.out.table_bqsr + + // Gather used softwares versions + versions = versions.mix(BAM_BASERECALIBRATOR_SPARK.out.versions) + } else { + + BAM_BASERECALIBRATOR( + ch_cram_for_bam_baserecalibrator, + dict, + fasta, + fasta_fai, + intervals_and_num_intervals, + known_sites_indels, + known_sites_indels_tbi) + + ch_table_bqsr_no_spark = BAM_BASERECALIBRATOR.out.table_bqsr + + // Gather used softwares versions + versions = versions.mix(BAM_BASERECALIBRATOR.out.versions) + } + + // ch_table_bqsr contains either: + // - bqsr table from baserecalibrator + // - bqsr table from baserecalibrator_spark + ch_table_bqsr = channel.empty().mix( + ch_table_bqsr_no_spark, + ch_table_bqsr_spark) + + reports = reports.mix(ch_table_bqsr.collect{ _meta, table -> [ table ] }) + + cram_applybqsr = ch_cram_for_bam_baserecalibrator.join(ch_table_bqsr, failOnDuplicate: true, failOnMismatch: true) + + // Create CSV to restart from this step + CHANNEL_BASERECALIBRATOR_CREATE_CSV(ch_md_cram_for_restart.join(ch_table_bqsr, failOnDuplicate: true), params.tools, params.skip_tools, params.outdir, params.save_output_as_bam) + } + } + + // STEP 4: RECALIBRATING + if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate']) { + + // Run if starting from step "prepare_recalibration" + if (params.step == 'recalibrate') { + + cram_applybqsr = channel.empty().mix(input_sample) + + } + + if (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator'))) { + cram_variant_calling_no_spark = channel.empty() + cram_variant_calling_spark = channel.empty() + + if (params.use_gatk_spark && params.use_gatk_spark.contains('baserecalibrator')) { + + BAM_APPLYBQSR_SPARK( + cram_applybqsr, + dict, + fasta, + fasta_fai, + intervals_and_num_intervals) + + cram_variant_calling_spark = BAM_APPLYBQSR_SPARK.out.cram + + // Gather used softwares versions + versions = versions.mix(BAM_APPLYBQSR_SPARK.out.versions) + + } else { + + BAM_APPLYBQSR( + cram_applybqsr, + dict, + fasta, + fasta_fai, + intervals_and_num_intervals) + + cram_variant_calling_no_spark = BAM_APPLYBQSR.out.cram + + // Gather used softwares versions + versions = versions.mix(BAM_APPLYBQSR.out.versions) + } + + cram_variant_calling = channel.empty().mix( + cram_variant_calling_no_spark, + cram_variant_calling_spark) + + // If params.save_output_as_bam, then convert CRAM files to BAM + CRAM_TO_BAM_RECAL(cram_variant_calling, fasta, fasta_fai) + versions = versions.mix(CRAM_TO_BAM_RECAL.out.versions) + + // CSV should be written for the file actually out out, either CRAM or BAM + csv_recalibration = channel.empty() + csv_recalibration = params.save_output_as_bam ? CRAM_TO_BAM_RECAL.out.bam.join(CRAM_TO_BAM_RECAL.out.bai, failOnDuplicate: true, failOnMismatch: true) : cram_variant_calling + + // Create CSV to restart from this step + CHANNEL_APPLYBQSR_CREATE_CSV(csv_recalibration, params.outdir, params.save_output_as_bam) + + } else if (params.step == 'recalibrate') { + // cram_variant_calling contains either: + // - input bams converted to crams, if started from step recal + skip BQSR + // - input crams if started from step recal + skip BQSR + cram_variant_calling = channel.empty().mix(input_sample.map{ meta, cram, crai, _table -> [ meta, cram, crai ] }) + } else { + // cram_variant_calling contains either: + // - crams from markduplicates = ch_cram_for_bam_baserecalibrator if skip BQSR but not started from step recalibration + cram_variant_calling = channel.empty().mix(ch_cram_for_bam_baserecalibrator) + } + } + + emit: + cram_variant_calling + reports + versions + +} diff --git a/subworkflows/local/fastq_preprocess_parabricks/main.nf b/subworkflows/local/fastq_preprocess_parabricks/main.nf new file mode 100644 index 0000000000..805bc5af59 --- /dev/null +++ b/subworkflows/local/fastq_preprocess_parabricks/main.nf @@ -0,0 +1,101 @@ +include { PARABRICKS_FQ2BAM } from '../../../modules/nf-core/parabricks/fq2bam/main.nf' +include { CHANNEL_ALIGN_CREATE_CSV } from '../../../subworkflows/local/channel_align_create_csv/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../../../modules/nf-core/samtools/convert/main' +include { CRAM_MERGE_INDEX_SAMTOOLS } from '../../../subworkflows/local/cram_merge_index_samtools/main' + +workflow FASTQ_PREPROCESS_PARABRICKS { + + take: + ch_reads // channel: [mandatory] meta, reads + ch_fasta // channel: [mandatory] meta, fasta + ch_fasta_fai // channel: [mandatory] meta, fasta_fai + ch_index // channel: [mandatory] meta, index - bwa index + ch_interval_file // channel: [optional] intervals_bed_combined + ch_known_sites // channel: [optional] known_sites_indels + val_output_fmt // either bam or cram + val_save_mapped // boolean + val_save_output_as_bam // boolean + val_outdir // output directory for saving mapped files + + main: + ch_versions = channel.empty() + ch_reports = channel.empty() + + ch_reads.map { meta, reads -> + [ meta.subMap('patient', 'sample', 'sex', 'status'), reads ] + } + .groupTuple() + .map { meta, reads -> + meta + [ n_fastq: reads.size() ] // We can drop the FASTQ files now that we know how many there are + }.set { reads_grouping_key } + + ch_reads = ch_reads.map{ meta, reads -> + // Update meta.id to meta.sample no multiple lanes or splitted fastqs + if (meta.size * meta.num_lanes == 1) [ meta + [ id:meta.sample ], reads ] + else [ meta, reads ] + } + + // Adjust ch_interval_file + ch_interval_file = ch_interval_file.collect().map { files -> + [['id': 'intervals'], files] + } + + // Adjust ch_known_sites + ch_known_sites= ch_known_sites.collect().map { files -> + [['id': 'known_sites'], files] + } + + PARABRICKS_FQ2BAM( + ch_reads, // channel: [ val(meta), reads ] + ch_fasta, // channel: [ val(meta), fasta ] + ch_index, // channel: [ val(meta), index ] + ch_interval_file, // channel: [ val(meta), interval_file ] + ch_known_sites, // channel: [ val(meta), known_sites ] + val_output_fmt // either bam or cram + ) + + // Grouping the bams from the same samples not to stall the workflow + // Use groupKey to make sure that the correct group can advance as soon as it is complete + // and not stall the workflow until all reads from all channels are mapped + cram_mapped = PARABRICKS_FQ2BAM.out.cram + .combine(reads_grouping_key) // Creates a tuple of [ meta, bam, reads_grouping_key ] + .filter { meta1, _cram, meta2 -> meta1.sample == meta2.sample } + // Add n_fastq and other variables to meta + .map { meta1, cram, meta2 -> + [ meta1 + meta2, cram ] + } + // Manipulate meta map to remove old fields and add new ones + .map { meta, cram -> + [ meta - meta.subMap('id', 'read_group', 'data_type', 'size', 'sample_lane_id', 'lane') + [ data_type: 'cram', id: meta.sample ], cram ] + } + // Create groupKey from meta map + .map { meta, cram -> + [ groupKey( meta, meta.n_fastq), cram ] + } + // Group + .groupTuple() + + // crams are merged (when multiple lanes from the same sample) and indexed + CRAM_MERGE_INDEX_SAMTOOLS(cram_mapped, ch_fasta, ch_fasta_fai) + + ch_versions = ch_versions.mix(CRAM_MERGE_INDEX_SAMTOOLS.out.versions) + + cram_variant_calling = CRAM_MERGE_INDEX_SAMTOOLS.out.cram_crai + .map { meta, cram, crai -> + [ meta - meta.subMap('id', 'read_group', 'data_type', 'size', 'sample_lane_id', 'lane') + [ data_type: 'cram', id: meta.sample ], cram, crai ] + } + + if (val_save_output_as_bam) { + // Convert CRAM files to BAM + CRAM_TO_BAM(cram_variant_calling, ch_fasta, ch_fasta_fai) + ch_versions = ch_versions.mix(CRAM_TO_BAM.out.versions) + CHANNEL_ALIGN_CREATE_CSV(CRAM_TO_BAM.out.bam.join(CRAM_TO_BAM.out.bai, failOnDuplicate: true, failOnMismatch: true), val_outdir, val_save_output_as_bam) + } else if (val_save_mapped) { + CHANNEL_ALIGN_CREATE_CSV(cram_variant_calling, val_outdir, val_save_output_as_bam) + } + + emit: + cram = cram_variant_calling // channel: [ val(meta), cram, crai ] + versions = ch_versions // channel: [ versions.yml ] + reports = ch_reports +} diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf deleted file mode 100644 index 5274a6b3b4..0000000000 --- a/subworkflows/local/germline_variant_calling.nf +++ /dev/null @@ -1,381 +0,0 @@ -// -// GERMLINE VARIANT CALLING -// - -include { BGZIP as BGZIP_DEEPVARIANT_GVCF } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_DEEPVARIANT_VCF } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_FREEBAYES } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_HAPLOTYPECALLER } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_MANTA_SMALL_INDELS } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_MANTA_SV } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_MANTA_DIPLOID } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_STRELKA } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_STRELKA_GENOME } from '../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_VCF_DEEPVARIANT } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_GVCF_DEEPVARIANT } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_FREEBAYES } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_HAPLOTYPECALLER } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_MANTA_SMALL_INDELS } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_MANTA_SV } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_MANTA_DIPLOID } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_STRELKA } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_STRELKA_GENOME } from '../../modules/local/concat_vcf/main' -include { DEEPVARIANT } from '../../modules/nf-core/modules/deepvariant/main' -include { FREEBAYES } from '../../modules/nf-core/modules/freebayes/main' -include { GATK_JOINT_GERMLINE_VARIANT_CALLING } from '../../subworkflows/nf-core/joint_germline_variant_calling/main' -include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/nf-core/modules/gatk4/haplotypecaller/main' -include { MANTA_GERMLINE } from '../../modules/nf-core/modules/manta/germline/main' -include { STRELKA_GERMLINE } from '../../modules/nf-core/modules/strelka/germline/main' -include { TIDDIT_SV } from '../../modules/nf-core/modules/tiddit/sv/main' -include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../modules/nf-core/modules/tabix/bgziptabix/main' -include { TABIX_TABIX as TABIX_DEEPVARIANT_VCF } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_DEEPVARIANT_GVCF } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_FREEBAYES } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_HAPLOTYPECALLER } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_MANTA } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_STRELKA } from '../../modules/nf-core/modules/tabix/tabix/main' - - -workflow GERMLINE_VARIANT_CALLING { - take: - tools // Mandatory, list of tools to apply - cram_recalibrated // channel: [mandatory] cram - dbsnp // channel: [mandatory] dbsnp - dbsnp_tbi // channel: [mandatory] dbsnp_tbi - dict // channel: [mandatory] dict - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fasta_fai - intervals // channel: [mandatory] intervals/target regions - intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combine_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed in one file - intervals_bed_combine_gz // channel: [mandatory] intervals/target regions index zipped and indexed in one file - num_intervals // val: number of intervals that are used to parallelize exection, either based on capture kit or GATK recommended for WGS - no_intervals - joint_germline // val: true/false on whether to run joint_germline calling, only works in combination with haplotypecaller at the moment - - main: - - if(!tools) tools = "" - - ch_versions = Channel.empty() - - deepvariant_vcf = Channel.empty() - freebayes_vcf = Channel.empty() - haplotypecaller_gvcf = Channel.empty() - manta_vcf = Channel.empty() - strelka_vcf = Channel.empty() - - cram_recalibrated.combine(intervals).map{ meta, cram, crai, intervals -> - sample = meta.sample - new_intervals = intervals.baseName != "no_intervals" ? intervals : [] - id = new_intervals ? sample + "_" + new_intervals.baseName : sample - [[ id: id, sample: meta.sample, gender: meta.gender, status: meta.status, patient: meta.patient ], cram, crai, new_intervals] - }.set{cram_recalibrated_intervals} - - cram_recalibrated.combine(intervals_bed_gz_tbi) - .map{ meta, cram, crai, bed, tbi -> - sample = meta.sample - new_bed = bed.simpleName != "no_intervals" ? bed : [] - new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] - id = new_bed ? sample + "_" + new_bed.simpleName : sample - new_meta = [ id: id, sample: meta.sample, gender: meta.gender, status: meta.status, patient: meta.patient ] - [new_meta, cram, crai, new_bed, new_tbi] - }.set{cram_recalibrated_intervals_gz_tbi} - - //TODO: benchmark if it is better to provide multiple bed files & run on multiple machines + mergeing afterwards || one containing all intervals and run on one larger machine - // Deepvariant: https://github.com/google/deepvariant/issues/510 - - if (tools.contains('deepvariant')) { - DEEPVARIANT( - cram_recalibrated_intervals, - fasta, - fasta_fai) - ch_versions = ch_versions.mix(DEEPVARIANT.out.versions) - - if(no_intervals){ - TABIX_DEEPVARIANT_VCF(DEEPVARIANT.out.vcf) - TABIX_DEEPVARIANT_GVCF(DEEPVARIANT.out.gvcf) - - deepvariant_vcf_gz = DEEPVARIANT.out.vcf - deepvariant_gvcf_gz = DEEPVARIANT.out.gvcf - - ch_versions = ch_versions.mix(TABIX_DEEPVARIANT_VCF.out.versions) - ch_versions = ch_versions.mix(TABIX_DEEPVARIANT_GVCF.out.versions) - }else{ - BGZIP_DEEPVARIANT_VCF(DEEPVARIANT.out.vcf) - BGZIP_DEEPVARIANT_GVCF(DEEPVARIANT.out.gvcf) - - BGZIP_DEEPVARIANT_VCF.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{deepvariant_vcf_to_concat} - - BGZIP_DEEPVARIANT_GVCF.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{deepvariant_gvcf_to_concat} - - CONCAT_VCF_DEEPVARIANT(deepvariant_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - CONCAT_GVCF_DEEPVARIANT(deepvariant_gvcf_to_concat, fasta_fai, intervals_bed_combine_gz) - - deepvariant_vcf_gz = CONCAT_VCF_DEEPVARIANT.out.vcf - deepvariant_gvcf_gz = CONCAT_GVCF_DEEPVARIANT.out.vcf - - ch_versions = ch_versions.mix(BGZIP_DEEPVARIANT_VCF.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_DEEPVARIANT.out.versions) - } - - deepvariant_vcf = deepvariant_vcf.mix(deepvariant_vcf_gz, deepvariant_gvcf_gz) - } - - if (tools.contains('freebayes')){ - - cram_recalibrated.combine(intervals).map{ meta, cram, crai, intervals -> - sample = meta.sample - new_intervals = intervals.baseName != "no_intervals" ? intervals : [] - id = new_intervals ? sample + "_" + new_intervals.baseName : sample - new_meta = [ id: id, sample: meta.sample, gender: meta.gender, status: meta.status, patient: meta.patient ] - [new_meta, cram, crai, [], [], new_intervals] - }.set{cram_recalibrated_intervals_freebayes} - - FREEBAYES( - cram_recalibrated_intervals_freebayes, - fasta, - fasta_fai, - [], - [], - [] - ) - ch_versions = ch_versions.mix(FREEBAYES.out.versions) - - if(no_intervals){ - TABIX_FREEBAYES(FREEBAYES.out.vcf) - freebayes_vcf_gz = FREEBAYES.out.vcf - ch_versions = ch_versions.mix(TABIX_FREEBAYES.out.versions) - }else{ - BGZIP_FREEBAYES(FREEBAYES.out.vcf) - - BGZIP_FREEBAYES.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{freebayes_vcf_to_concat} - - CONCAT_VCF_FREEBAYES(freebayes_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - freebayes_vcf_gz = CONCAT_VCF_FREEBAYES.out.vcf - - ch_versions = ch_versions.mix(BGZIP_FREEBAYES.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_FREEBAYES.out.versions) - } - - freebayes_vcf = freebayes_vcf.mix(freebayes_vcf_gz) - } - - if (tools.contains('haplotypecaller')) { - - HAPLOTYPECALLER( - cram_recalibrated_intervals, - fasta, - fasta_fai, - dict, - dbsnp, - dbsnp_tbi - ) - - ch_versions = ch_versions.mix(HAPLOTYPECALLER.out.versions) - - if(no_intervals){ - TABIX_HAPLOTYPECALLER(HAPLOTYPECALLER.out.vcf) - haplotypecaller_gvcf_gz = HAPLOTYPECALLER.out.vcf - ch_versions = ch_versions.mix(TABIX_HAPLOTYPECALLER.out.versions) - }else{ - BGZIP_HAPLOTYPECALLER(HAPLOTYPECALLER.out.vcf) - - BGZIP_HAPLOTYPECALLER.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{haplotypecaller_gvcf_to_concat} - - CONCAT_VCF_HAPLOTYPECALLER(haplotypecaller_gvcf_to_concat, fasta_fai, intervals_bed_combine_gz) - haplotypecaller_gvcf_gz = CONCAT_VCF_HAPLOTYPECALLER.out.vcf - - ch_versions = ch_versions.mix(BGZIP_HAPLOTYPECALLER.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_HAPLOTYPECALLER.out.versions) - } - - - if(joint_germline){ - run_haplotypecaller = false - run_vqsr = true //parameter? - //some feedback from gavin - // GATK_JOINT_GERMLINE_VARIANT_CALLING( - // haplotypecaller_vcf_gz_tbi, - // run_haplotypecaller, - // run_vqsr, - // fasta, - // fasta_fai, - // dict, - // dbsnp, - // dbsnp_tbi, - // "joined", - // allelespecific? - // resources? - // annotation? - // "BOTH", - // true, - // truthsensitivity -> parameter or module? - // ) - // ch_versions = ch_versions.mix(GATK_JOINT_GERMLINE_VARIANT_CALLING.out.versions) - } - - haplotypecaller_gvcf = haplotypecaller_gvcf.mix(haplotypecaller_gvcf_gz) - - } - - if (tools.contains('manta')){ - //TODO: Research if splitting by intervals is ok, we pretend for now it is fine. Seems to be the consensus on upstream modules implementaiton too - - MANTA_GERMLINE( - cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai - ) - - ch_versions = ch_versions.mix(MANTA_GERMLINE.out.versions) - - if(no_intervals){ - manta_candidate_small_indels_vcf = MANTA_GERMLINE.out.candidate_small_indels_vcf - manta_candidate_sv_vcf = MANTA_GERMLINE.out.candidate_sv_vcf - manta_diploid_sv_vcf = MANTA_GERMLINE.out.diploid_sv_vcf - }else{ - - BGZIP_MANTA_SV(MANTA_GERMLINE.out.candidate_small_indels_vcf) - BGZIP_MANTA_SMALL_INDELS(MANTA_GERMLINE.out.candidate_sv_vcf) - BGZIP_MANTA_DIPLOID(MANTA_GERMLINE.out.diploid_sv_vcf) - - BGZIP_MANTA_SV.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_sv_vcf_to_concat} - - BGZIP_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_small_indels_vcf_to_concat} - - BGZIP_MANTA_DIPLOID.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_diploid_vcf_to_concat} - - CONCAT_VCF_MANTA_SV(manta_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - CONCAT_VCF_MANTA_SMALL_INDELS(manta_small_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_VCF_MANTA_DIPLOID(manta_diploid_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - - manta_candidate_small_indels_vcf = CONCAT_VCF_MANTA_SV.out.vcf - manta_candidate_sv_vcf = CONCAT_VCF_MANTA_SMALL_INDELS.out.vcf - manta_diploid_sv_vcf = CONCAT_VCF_MANTA_DIPLOID.out.vcf - - ch_versions = ch_versions.mix(BGZIP_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(BGZIP_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_MANTA_DIPLOID.out.versions) - - ch_versions = ch_versions.mix(CONCAT_VCF_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_MANTA_DIPLOID.out.versions) - - } - - manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf, manta_candidate_sv_vcf, manta_diploid_sv_vcf) - } - - if (tools.contains('strelka')) { - //TODO: Research if splitting by intervals is ok, no reply on issue, we pretend for now it is fine. Seems to be the consensus on upstream modules implementaiton too - - STRELKA_GERMLINE( - cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai - ) - - ch_versions = ch_versions.mix(STRELKA_GERMLINE.out.versions) - - if(no_intervals){ - strelka_vcf_gz = STRELKA_GERMLINE.out.vcf - strelka_genome_vcf_gz = STRELKA_GERMLINE.out.genome_vcf - - }else{ - BGZIP_STRELKA(STRELKA_GERMLINE.out.vcf) - BGZIP_STRELKA_GENOME(STRELKA_GERMLINE.out.genome_vcf) - - BGZIP_STRELKA.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{strelka_vcf_to_concat} - - BGZIP_STRELKA_GENOME.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{strelka_genome_vcf_to_concat} - - CONCAT_VCF_STRELKA(strelka_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_VCF_STRELKA_GENOME(strelka_genome_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - - strelka_vcf_gz = CONCAT_VCF_STRELKA.out.vcf - strelka_genome_vcf_gz = CONCAT_VCF_STRELKA_GENOME.out.vcf - - ch_versions = ch_versions.mix(BGZIP_STRELKA.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_STRELKA.out.versions) - } - - strelka_vcf = strelka_vcf.mix(strelka_vcf_gz,strelka_genome_vcf_gz ) - } - - if (tools.contains('tiddit')){ - //TODO: Update tiddit on bioconda, the current version does not support cram usage, needs newest version: - // https://github.com/SciLifeLab/TIDDIT/issues/82#issuecomment-1022103264 - // Issue opened, either this week or end of february - - // TIDDIT_SV( - // cram_recalibrated, - // fasta, - // fasta_fai - // ) - - // TABIX_BGZIP_TIDDIT_SV(TIDDIT_SV.out.vcf) - // tiddit_vcf_gz_tbi = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi - // tiddit_ploidy = TIDDIT_SV.out.ploidy - // tiddit_signals = TIDDIT_SV.out.signals - //tiddit_wig = TIDDIT_SV.out.wig - //tiddit_gc_wig = TIDDIT_SV.out.gc_wig - - //ch_versions = ch_versions.mix(TABIX_BGZIP_TIDDIT_SV.out.versions) - //ch_versions = ch_versions.mix(TIDDIT_SV.out.versions) - } - - emit: - deepvariant_vcf - freebayes_vcf - haplotypecaller_gvcf - manta_vcf - strelka_vcf - - versions = ch_versions -} diff --git a/subworkflows/local/mapping_csv.nf b/subworkflows/local/mapping_csv.nf deleted file mode 100644 index 4bcfb7dbd2..0000000000 --- a/subworkflows/local/mapping_csv.nf +++ /dev/null @@ -1,40 +0,0 @@ -// -// MAPPING_CSV -// - -workflow MAPPING_CSV { - take: - bam_indexed // channel: [mandatory] meta, bam, bai - save_bam_mapped // boolean: [mandatory] save_bam_mapped - skip_markduplicates // boolean: [mandatory] skip_markduplicates - - main: - if (save_bam_mapped) { - csv_bam_mapped = bam_indexed.map { meta, bam, bai -> [meta] } - // Creating csv files to restart from this step - csv_bam_mapped.collectFile(storeDir: "${params.outdir}/preprocessing/csv") { meta -> - patient = meta.patient[0] - sample = meta.sample[0] - gender = meta.gender[0] - status = meta.status[0] - bam = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.bam" - bai = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.bam.bai" - ["mapped_${sample}.csv", "patient,gender,status,sample,bam,bai\n${patient},${gender},${status},${sample},${bam},${bai}\n"] - }.collectFile(name: "mapped.csv", keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/preprocessing/csv") - } - - if (skip_markduplicates) { - csv_bam_mapped = bam_indexed.map { meta, bam, bai -> [meta] } - // Creating csv files to restart from this step - csv_bam_mapped.collectFile(storeDir: "${params.outdir}/preprocessing/csv") { meta -> - patient = meta.patient[0] - sample = meta.sample[0] - gender = meta.gender[0] - status = meta.status[0] - bam = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.bam" - bai = "${params.outdir}/preprocessing/${sample}/mapped/${sample}.bam.bai" - table = "${params.outdir}/preprocessing/${sample}/recal_table/${sample}.recal.table" - ["mapped_no_markduplicates_${sample}.csv", "patient,gender,status,sample,bam,bai,table\n${patient},${gender},${status},${sample},${bam},${bai},${table}\n"] - }.collectFile(name: 'mapped_no_markduplicates.csv', keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/preprocessing/csv") - } -} diff --git a/subworkflows/local/markduplicates_csv.nf b/subworkflows/local/markduplicates_csv.nf deleted file mode 100644 index e8aa4214e0..0000000000 --- a/subworkflows/local/markduplicates_csv.nf +++ /dev/null @@ -1,21 +0,0 @@ -// -// MARKDUPLICATES_CSV -// - -workflow MARKDUPLICATES_CSV { - take: - cram_markduplicates // channel: [mandatory] meta, cram, crai - - main: - // Creating csv files to restart from this step - cram_markduplicates.collectFile(storeDir: "${params.outdir}/preprocessing/csv") { meta, cram, crai -> - patient = meta.patient - sample = meta.sample - gender = meta.gender - status = meta.status - cram = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.cram" - crai = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.cram.crai" - table = "${params.outdir}/preprocessing/${sample}/recal_table/${sample}.recal.table" - ["markduplicates_${sample}.csv", "patient,gender,status,sample,cram,crai,table\n${patient},${gender},${status},${sample},${cram},${crai},${table}\n"] - }.collectFile(name: 'markduplicates.csv', keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/preprocessing/csv") -} diff --git a/subworkflows/local/pair_copy_number_calling.nf b/subworkflows/local/pair_copy_number_calling.nf deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf deleted file mode 100644 index e980393f5f..0000000000 --- a/subworkflows/local/pair_variant_calling.nf +++ /dev/null @@ -1,261 +0,0 @@ -// -// PAIRED VARIANT CALLING -// -include { BGZIP as BGZIP_MANTA_SMALL_INDELS } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_MANTA_SV } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_MANTA_DIPLOID } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_MANTA_SOMATIC } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_STRELKA_SNVS } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_STRELKA_INDELS } from '../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_VCF_MANTA_SMALL_INDELS } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_MANTA_SV } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_MANTA_DIPLOID } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_MANTA_SOMATIC } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_STRELKA_SNVS } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_STRELKA_INDELS } from '../../modules/local/concat_vcf/main' -include { GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/main' -include { MANTA_SOMATIC } from '../../modules/nf-core/modules/manta/somatic/main' -include { MSISENSORPRO_MSI_SOMATIC } from '../../modules/nf-core/modules/msisensorpro/msi_somatic/main' -include { STRELKA_SOMATIC } from '../../modules/nf-core/modules/strelka/somatic/main' - -workflow PAIR_VARIANT_CALLING { - take: - tools - cram_pair // channel: [mandatory] cram - dbsnp // channel: [mandatory] dbsnp - dbsnp_tbi // channel: [mandatory] dbsnp_tbi - dict // channel: [mandatory] dict - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fasta_fai - intervals // channel: [mandatory] intervals/target regions - intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combined_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combine_gz // channel: [mandatory] intervals/target regions index zipped and indexed in one file - num_intervals // val: number of intervals that are used to parallelize exection, either based on capture kit or GATK recommended for WGS - no_intervals - msisensorpro_scan // channel: [optional] msisensorpro_scan - germline_resource // channel: [optional] germline_resource - germline_resource_tbi // channel: [optional] germline_resource_tbi - panel_of_normals // channel: [optional] panel_of_normals - panel_of_normals_tbi // channel: [optional] panel_of_normals_tbi - - main: - - if(!tools) tools = "" - - ch_versions = Channel.empty() - manta_vcf = Channel.empty() - strelka_vcf = Channel.empty() - msisensorpro_output = Channel.empty() - mutect2_vcf = Channel.empty() - - - cram_pair.combine(intervals) - .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals -> - normal_id = meta.normal_id - tumor_id = meta.tumor_id - new_intervals = intervals.baseName != "no_intervals" ? intervals : [] - id = new_intervals ? tumor_id + "_vs_" + normal_id + "_" + new_intervals.baseName : tumor_id + "_vs_" + normal_id - new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ] - [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals] - }.set{cram_pair_intervals} - - cram_pair.combine(intervals_bed_gz_tbi) - .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed, tbi -> - normal_id = meta.normal_id - tumor_id = meta.tumor_id - - new_bed = bed.simpleName != "no_intervals" ? bed : [] - new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] - id = bed.simpleName != "no_intervals" ? tumor_id + "_vs_" + normal_id + "_" + bed.simpleName : tumor_id + "_vs_" + normal_id - new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient] - [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, new_bed, new_tbi] - }.set{cram_pair_intervals_gz_tbi} - - if (tools.contains('manta')) { - MANTA_SOMATIC( - cram_pair_intervals_gz_tbi, - fasta, - fasta_fai, - ) - - ch_versions = ch_versions.mix(MANTA_SOMATIC.out.versions) - - if(no_intervals){ - manta_candidate_small_indels_vcf = MANTA_SOMATIC.out.candidate_small_indels_vcf - manta_candidate_sv_vcf = MANTA_SOMATIC.out.candidate_sv_vcf - manta_diploid_sv_vcf = MANTA_SOMATIC.out.diploid_sv_vcf - manta_somatic_sv_vcf = MANTA_SOMATIC.out.somatic_sv_vcf - }else{ - - BGZIP_MANTA_SV(MANTA_SOMATIC.out.candidate_small_indels_vcf) - BGZIP_MANTA_SMALL_INDELS(MANTA_SOMATIC.out.candidate_sv_vcf) - BGZIP_MANTA_DIPLOID(MANTA_SOMATIC.out.diploid_sv_vcf) - BGZIP_MANTA_SOMATIC(MANTA_SOMATIC.out.somatic_sv_vcf) - - BGZIP_MANTA_SV.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_sv_vcf_to_concat} - - BGZIP_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_small_indels_vcf_to_concat} - - BGZIP_MANTA_DIPLOID.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_diploid_vcf_to_concat} - - BGZIP_MANTA_SOMATIC.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_somatic_sv_vcf_to_concat} - - CONCAT_VCF_MANTA_SV(manta_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - CONCAT_VCF_MANTA_SMALL_INDELS(manta_small_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_VCF_MANTA_DIPLOID(manta_diploid_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - CONCAT_VCF_MANTA_SOMATIC(manta_somatic_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - - manta_candidate_small_indels_vcf = CONCAT_VCF_MANTA_SV.out.vcf - manta_candidate_sv_vcf = CONCAT_VCF_MANTA_SMALL_INDELS.out.vcf - manta_diploid_sv_vcf = CONCAT_VCF_MANTA_DIPLOID.out.vcf - manta_somatic_sv_vcf = CONCAT_VCF_MANTA_SOMATIC.out.vcf - - ch_versions = ch_versions.mix(BGZIP_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(BGZIP_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(BGZIP_MANTA_SOMATIC.out.versions) - - ch_versions = ch_versions.mix(CONCAT_VCF_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_MANTA_DIPLOID.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_MANTA_SOMATIC.out.versions) - - } - - manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf,manta_candidate_sv_vcf,manta_diploid_sv_vcf,manta_somatic_sv_vcf) - - cram_pair_strelka = Channel.empty() - if (tools.contains('strelka') && tools.contains('manta')) { - - cram_pair.join(manta_somatic_sv_vcf).combine(intervals_bed_gz_tbi) - .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, bed, tbi -> - normal_id = meta.normal_id - tumor_id = meta.tumor_id - - new_bed = bed.simpleName != "no_intervals" ? bed : [] - new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] - id = bed.simpleName != "no_intervals" ? tumor_id + "_vs_" + normal_id + "_" + bed.simpleName : tumor_id + "_vs_" + normal_id - new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient] - [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, manta_vcf, manta_tbi, new_bed, new_tbi] - }.set{cram_pair_strelka} - } else if (tools.contains('strelka') && !tools.contains('manta')) - - cram_pair.combine(intervals_bed_gz_tbi) - .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed, tbi -> - normal_id = meta.normal_id - tumor_id = meta.tumor_id - - new_bed = bed.simpleName != "no_intervals" ? bed : [] - new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] - id = bed.simpleName != "no_intervals" ? tumor_id + "_vs_" + normal_id + "_" + bed.simpleName : tumor_id + "_vs_" + normal_id - new_meta = [ id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient] - - [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, [], [], new_bed, new_tbi] - }.set{cram_pair_strelka} - } - - if(tools.contains('strelka')){ - - STRELKA_SOMATIC( - cram_pair_strelka, - fasta, - fasta_fai - ) - - if(no_intervals){ - strelka_snvs_vcf_gz = STRELKA_SOMATIC.out.vcf_snvs - strelka_indels_vcf_gz = STRELKA_SOMATIC.out.vcf_indels - }else{ - BGZIP_STRELKA_SNVS(STRELKA_SOMATIC.out.vcf_snvs) - BGZIP_STRELKA_INDELS(STRELKA_SOMATIC.out.vcf_indels) - - BGZIP_STRELKA_SNVS.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{strelka_snvs_vcf_to_concat} - - BGZIP_STRELKA_INDELS.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{strelka_indels_vcf_to_concat} - - CONCAT_VCF_STRELKA_SNVS(strelka_snvs_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_VCF_STRELKA_INDELS(strelka_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - - strelka_snvs_vcf_gz = CONCAT_VCF_STRELKA_SNVS.out.vcf - strelka_indels_vcf_gz = CONCAT_VCF_STRELKA_INDELS.out.vcf - - ch_versions = ch_versions.mix(BGZIP_STRELKA_SNVS.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_STRELKA_SNVS.out.versions) - } - - strelka_vcf = strelka_vcf.mix(strelka_snvs_vcf_gz,strelka_indels_vcf_gz) - } - - if (tools.contains('msisensorpro')) { - - MSISENSORPRO_MSI_SOMATIC( - cram_pair, - fasta, - msisensorpro_scan) - ch_versions = ch_versions.mix(MSISENSORPRO_MSI_SOMATIC.out.versions) - - msisensorpro_output = msisensorpro_output.mix(MSISENSORPRO_MSI_SOMATIC.out.report) - } - - if (tools.contains('mutect2')){ - cram_pair_intervals.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals -> - [meta, [normal_cram, tumor_cram], [normal_crai, tumor_crai], intervals, ['normal']] - }.set{cram_pair_mutect2} - - GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING( - cram_pair_mutect2, - fasta, - fasta_fai, - dict, - germline_resource, - germline_resource_tbi, - panel_of_normals, - panel_of_normals_tbi, - no_intervals, - num_intervals, - intervals_bed_combine_gz - ) - ch_versions = ch_versions.mix(GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING.out.versions) - } - - // if (tools.contains('tiddit')){ - // } - - emit: - versions = ch_versions - manta_vcf - strelka_vcf - mutect2_vcf - msisensorpro_output -} diff --git a/subworkflows/local/post_variantcalling/main.nf b/subworkflows/local/post_variantcalling/main.nf new file mode 100644 index 0000000000..63e6a251d4 --- /dev/null +++ b/subworkflows/local/post_variantcalling/main.nf @@ -0,0 +1,175 @@ +// +// POST VARIANT CALLING: processes run on variantcalled but not annotated VCFs +// +include { BCFTOOLS_VIEW as FILTER_VCFS } from '../../../modules/nf-core/bcftools/view' +include { CONCATENATE_GERMLINE_VCFS } from '../vcf_concatenate_germline' +include { CONSENSUS } from '../vcf_consensus' +include { NORMALIZE_VCFS } from '../vcf_normalization' +include { VCF_VARLOCIRAPTOR_SINGLE as VCF_VARLOCIRAPTOR_GERMLINE } from '../vcf_varlociraptor_single' +include { VCF_VARLOCIRAPTOR_SOMATIC } from '../vcf_varlociraptor_somatic' +include { VCF_VARLOCIRAPTOR_SINGLE as VCF_VARLOCIRAPTOR_TUMOR_ONLY } from '../vcf_varlociraptor_single' + +workflow POST_VARIANTCALLING { + take: + tools + cram_germline + germline_vcfs + germline_tbis + cram_tumor_only + tumor_only_vcfs + tumor_only_tbis + cram_somatic + somatic_vcfs + somatic_tbis + fasta + fai + concatenate_vcfs + filter_vcfs + snv_consensus_calling + normalize_vcfs + varlociraptor_chunk_size // integer: [mandatory] [default: 15] number of chunks to split BCF files when preprocessing and calling variants + varlociraptor_scenario_germline + varlociraptor_scenario_somatic + varlociraptor_scenario_tumor_only + + main: + versions = Channel.empty() + vcfs = Channel.empty() + tbis = Channel.empty() + + // + // VARLOCIRAPTOR + // + if (tools && tools.split(',').contains('varlociraptor')) { + // GERMLINE + VCF_VARLOCIRAPTOR_GERMLINE(cram_germline, fasta, fai, varlociraptor_scenario_germline, germline_vcfs, varlociraptor_chunk_size, 'normal') + + vcfs = vcfs.mix(VCF_VARLOCIRAPTOR_GERMLINE.out.vcf) + tbis = tbis.mix(VCF_VARLOCIRAPTOR_GERMLINE.out.tbi) + versions = versions.mix(VCF_VARLOCIRAPTOR_GERMLINE.out.versions) + + // SOMATIC + VCF_VARLOCIRAPTOR_SOMATIC(cram_somatic, fasta, fai, varlociraptor_scenario_somatic, somatic_vcfs, germline_vcfs, varlociraptor_chunk_size) + + vcfs = vcfs.mix(VCF_VARLOCIRAPTOR_SOMATIC.out.vcf) + tbis = tbis.mix(VCF_VARLOCIRAPTOR_SOMATIC.out.tbi) + versions = versions.mix(VCF_VARLOCIRAPTOR_SOMATIC.out.versions) + + // TUMOR ONLY + VCF_VARLOCIRAPTOR_TUMOR_ONLY(cram_tumor_only, fasta, fai, varlociraptor_scenario_tumor_only, tumor_only_vcfs, varlociraptor_chunk_size, 'tumor') + + vcfs = vcfs.mix(VCF_VARLOCIRAPTOR_TUMOR_ONLY.out.vcf) + tbis = tbis.mix(VCF_VARLOCIRAPTOR_TUMOR_ONLY.out.tbi) + versions = versions.mix(VCF_VARLOCIRAPTOR_TUMOR_ONLY.out.versions) + + } else if (filter_vcfs || normalize_vcfs || concatenate_vcfs ) { + + // IMPORTANT: When adding new SNV variant callers to Sarek, add them to this list! + // This list determines which variant callers are eligible for: + // - VCF normalization (--normalize_vcfs) + // - VCF filtering (--filter_vcfs) + // - Consensus calling (--snv_consensus_calling) + // + // To find all variant callers: grep "variantcaller:" subworkflows/local/bam_variant_calling*/main.nf + // + // Excluded callers (not eligible for normalization/consensus): + // - manta, tiddit: structural variant callers (separate workflow) + // - samtools mpileup produces pileup format for ControlFREEC, not consensus-ready VCFs + def small_variantcallers = ['bcftools', 'deepvariant', 'freebayes', 'haplotypecaller', + 'lofreq', 'muse', 'mutect2', 'sentieon_dnascope', + 'sentieon_haplotyper', 'sentieon_tnscope', 'strelka' ] + + def excluded_variantcallers = ['manta', 'tiddit', 'samtools'] + + all_vcfs = Channel.empty().mix(germline_vcfs, tumor_only_vcfs, somatic_vcfs) + .branch{ meta, vcf -> + small: small_variantcallers.contains(meta.variantcaller) + other: true + } + + all_tbis = Channel.empty().mix(germline_tbis, tumor_only_tbis, somatic_tbis) + .branch{ meta, tbi -> + small: small_variantcallers.contains(meta.variantcaller) + other: true + } + + // Validate that we're not silently excluding unknown variant callers + all_vcfs.other.subscribe { meta, vcf -> + if (!excluded_variantcallers.contains(meta.variantcaller)) { + error("Variant caller '${meta.variantcaller}' is not in the small_variantcallers list and will be excluded from normalization/filtering/consensus. If this is a new SNV caller, please add it to the list in subworkflows/local/post_variantcalling/main.nf:78-80") + } + } + + // Needs to be reassigned to enable pass through reassignment below + // Due to strelka having multiple outputs, we are adding the file name (vcf.gz) for both here to make sure the right files are joined below + small_variant_vcfs = all_vcfs.small.map{ meta, vcfs_ -> [meta + [filename: vcfs_.name], vcfs_]} + small_variant_tbis = all_tbis.small.map{ meta, tbis_ -> [meta + [filename: tbis_.baseName], tbis_]} + + // 1. Filter by PASS and custom fields + // 2. Normalize + // 3. Aggregate variants (Union, intersection, or n-1) + if(filter_vcfs) { + + // Join VCFs with their corresponding TBIs before filtering + FILTER_VCFS( small_variant_vcfs.join(small_variant_tbis, failOnDuplicate: true, failOnMismatch: true), [], [], []) + + small_variant_vcfs = FILTER_VCFS.out.vcf + small_variant_tbis = FILTER_VCFS.out.tbi + versions = versions.mix(FILTER_VCFS.out.versions) + } + + if (normalize_vcfs) { + + NORMALIZE_VCFS(small_variant_vcfs, fasta) + + small_variant_vcfs = NORMALIZE_VCFS.out.vcfs // [meta, vcf] + small_variant_tbis = NORMALIZE_VCFS.out.tbis // [meta, tbi] + versions = versions.mix(NORMALIZE_VCFS.out.versions) + } + + if (normalize_vcfs && snv_consensus_calling){ + // Preserve individual caller VCFs before consensus (for annotation) + individual_caller_vcfs = small_variant_vcfs + individual_caller_tbis = small_variant_tbis + + CONSENSUS(small_variant_vcfs.join(small_variant_tbis, failOnDuplicate: true, failOnMismatch: true)) + + consensus_vcfs = CONSENSUS.out.vcfs.map { meta, vcfs_ -> + meta.variantcaller = 'consensus' + [meta, vcfs_] + } + consensus_tbis = CONSENSUS.out.tbis.map { meta, tbis_ -> + meta.variantcaller = 'consensus' + [meta, tbis_] + } + + // Mix consensus VCF with individual caller VCFs for downstream annotation + small_variant_vcfs = consensus_vcfs.mix(individual_caller_vcfs) + small_variant_tbis = consensus_tbis.mix(individual_caller_tbis) + + versions = versions.mix(CONSENSUS.out.versions) + } + + vcfs = small_variant_vcfs.mix(all_vcfs.other) + tbis = small_variant_tbis.mix(all_tbis.other) + + if (concatenate_vcfs) { + CONCATENATE_GERMLINE_VCFS(germline_vcfs) + + vcfs = vcfs.mix(CONCATENATE_GERMLINE_VCFS.out.vcfs) + tbis = tbis.mix(CONCATENATE_GERMLINE_VCFS.out.tbis) + + versions = versions.mix(CONCATENATE_GERMLINE_VCFS.out.versions) + } + + + } else { + // No post-processing requested, pass through original VCFs + vcfs = vcfs.mix(germline_vcfs,tumor_only_vcfs, somatic_vcfs) + } + + emit: + vcfs // post processed vcfs [meta, vcf] + tbis // post processed tbis [meta, tbi] + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/post_variantcalling/tests/main.nf.test b/subworkflows/local/post_variantcalling/tests/main.nf.test new file mode 100644 index 0000000000..b19b1117e9 --- /dev/null +++ b/subworkflows/local/post_variantcalling/tests/main.nf.test @@ -0,0 +1,1075 @@ +nextflow_workflow { + + name "Test Subworkflow POST_VARIANTCALLING" + script "../main.nf" + workflow "POST_VARIANTCALLING" + config "./nextflow.config" + + tag "subworkflows" + tag "subworkflows_local" + tag "post_variantcalling" + + test("POST_VARIANTCALLING - normalization only") { + + when { + workflow { + """ + // Input setup + tools = '' + cram_germline = [[:],[]] + germline_vcfs = channel.of([[id: 'test_normal'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)]) + germline_tbis = Channel.empty() + cram_tumor_only = [[:],[]] + tumor_only_vcfs = channel.of([[id: 'test_tumor'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)]) + tumor_only_tbis = Channel.empty() + cram_somatic = [[:],[]] + somatic_vcfs = channel.of([[id: 'test_somatic'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)]) + somatic_tbis = Channel.empty() + fasta = channel.of([[id: 'fasta'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + fai = [[:],[]] + concatenate_vcfs = false + filter_vcfs = false + snv_consensus_calling = false + normalize_vcfs = true + varlociraptor_chunk_size = 15 + varlociraptor_scenario_germline = [] + varlociraptor_scenario_somatic = [] + varlociraptor_scenario_tumor_only = [] + + input[0] = tools + input[1] = cram_germline + input[2] = germline_vcfs + input[3] = germline_tbis + input[4] = cram_tumor_only + input[5] = tumor_only_vcfs + input[6] = tumor_only_tbis + input[7] = cram_somatic + input[8] = somatic_vcfs + input[9] = somatic_tbis + input[10] = fasta + input[11] = fai + input[12] = concatenate_vcfs + input[13] = filter_vcfs + input[14] = snv_consensus_calling + input[15] = normalize_vcfs + input[16] = varlociraptor_chunk_size + input[17] = varlociraptor_scenario_germline + input[18] = varlociraptor_scenario_somatic + input[19] = varlociraptor_scenario_tumor_only + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs, + workflow.out.tbis, + workflow.out.versions + ).match() } + ) + } + } + + test("POST_VARIANTCALLING - concatenation only") { + + when { + + workflow { + """ + // Input setup + tools = '' + cram_germline = Channel.empty() + germline_vcfs = Channel.of( + [[id: 'test_sample', num: '1'], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)], + [[id: 'test_sample', num: '2'], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)] + ) + germline_tbis = Channel.empty() + cram_tumor_only = Channel.empty() + tumor_only_vcfs = Channel.empty() + tumor_only_tbis = Channel.empty() + cram_somatic = Channel.empty() + somatic_vcfs = Channel.empty() + somatic_tbis = Channel.empty() + fasta = Channel.of([[id: 'fasta'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + fai = Channel.of([[id: 'fai'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + concatenate_vcfs = true + filter_vcfs = false + snv_consensus_calling = false + normalize_vcfs = false + varlociraptor_chunk_size = 15 + varlociraptor_scenario_germline = [] + varlociraptor_scenario_somatic = [] + varlociraptor_scenario_tumor_only = [] + + input[0] = tools + input[1] = cram_germline + input[2] = germline_vcfs + input[3] = germline_tbis + input[4] = cram_tumor_only + input[5] = tumor_only_vcfs + input[6] = tumor_only_tbis + input[7] = cram_somatic + input[8] = somatic_vcfs + input[9] = somatic_tbis + input[10] = fasta + input[11] = fai + input[12] = concatenate_vcfs + input[13] = filter_vcfs + input[14] = snv_consensus_calling + input[15] = normalize_vcfs + input[16] = varlociraptor_chunk_size + input[17] = varlociraptor_scenario_germline + input[18] = varlociraptor_scenario_somatic + input[19] = varlociraptor_scenario_tumor_only + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs, + workflow.out.tbis, + workflow.out.versions + ).match() } + ) + } + } + + test("POST_VARIANTCALLING - normalization and concatenation") { + + when { + + workflow { + """ + // Input setup + tools = '' + cram_germline = Channel.empty() + germline_vcfs = Channel.of( + [[id: 'test_sample', num: '1'], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)], + [[id: 'test_sample', num: '2'], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)] + ) + germline_tbis = Channel.empty() + cram_tumor_only = Channel.empty() + tumor_only_vcfs = Channel.empty() + tumor_only_tbis = Channel.empty() + cram_somatic = Channel.empty() + somatic_vcfs = Channel.empty() + somatic_tbis = Channel.empty() + fasta = Channel.of([[id: 'fasta'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + fai = Channel.of([[id: 'fai'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + concatenate_vcfs = true + filter_vcfs = false + snv_consensus_calling = false + normalize_vcfs = true + varlociraptor_chunk_size = 15 + varlociraptor_scenario_germline = [] + varlociraptor_scenario_somatic = [] + varlociraptor_scenario_tumor_only = [] + + input[0] = tools + input[1] = cram_germline + input[2] = germline_vcfs + input[3] = germline_tbis + input[4] = cram_tumor_only + input[5] = tumor_only_vcfs + input[6] = tumor_only_tbis + input[7] = cram_somatic + input[8] = somatic_vcfs + input[9] = somatic_tbis + input[10] = fasta + input[11] = fai + input[12] = concatenate_vcfs + input[13] = filter_vcfs + input[14] = snv_consensus_calling + input[15] = normalize_vcfs + input[16] = varlociraptor_chunk_size + input[17] = varlociraptor_scenario_germline + input[18] = varlociraptor_scenario_somatic + input[19] = varlociraptor_scenario_tumor_only + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs, + workflow.out.tbis, + workflow.out.versions + ).match() } + ) + } + } + + test("POST_VARIANTCALLING - no processing") { + + when { + + workflow { + """ + // Input setup + tools = 'strelka' + cram_germline = Channel.empty() + germline_vcfs = Channel.of( + [[id: 'test_sample'], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)] + ) + germline_tbis = Channel.empty() + cram_tumor_only = Channel.empty() + tumor_only_vcfs = Channel.empty() + tumor_only_tbis = Channel.empty() + cram_somatic = Channel.empty() + somatic_vcfs = Channel.empty() + somatic_tbis = Channel.empty() + fasta = Channel.of([[id: 'fasta'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + fai = Channel.of([[id: 'fai'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + concatenate_vcfs = false + filter_vcfs = false + snv_consensus_calling = false + normalize_vcfs = false + varlociraptor_chunk_size = 15 + varlociraptor_scenario_germline = [] + varlociraptor_scenario_somatic = [] + varlociraptor_scenario_tumor_only = [] + + input[0] = tools + input[1] = cram_germline + input[2] = germline_vcfs + input[3] = germline_tbis + input[4] = cram_tumor_only + input[5] = tumor_only_vcfs + input[6] = tumor_only_tbis + input[7] = cram_somatic + input[8] = somatic_vcfs + input[9] = somatic_tbis + input[10] = fasta + input[11] = fai + input[12] = concatenate_vcfs + input[13] = filter_vcfs + input[14] = snv_consensus_calling + input[15] = normalize_vcfs + input[16] = varlociraptor_chunk_size + input[17] = varlociraptor_scenario_germline + input[18] = varlociraptor_scenario_somatic + input[19] = varlociraptor_scenario_tumor_only + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.vcfs.size() == 0 }, + { assert workflow.out.tbis.size() == 0 } + ) + } + } + + test("POST_VARIANTCALLING - normalization only - stub") { + + options "-stub" + + when { + + workflow { + """ + // Input setup + tools = 'strelka' + cram_germline = Channel.empty() + germline_vcfs = Channel.of( + [[id: 'test_sample'], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)] + ) + germline_tbis = Channel.empty() + cram_tumor_only = Channel.empty() + tumor_only_vcfs = Channel.empty() + tumor_only_tbis = Channel.empty() + cram_somatic = Channel.empty() + somatic_vcfs = Channel.empty() + somatic_tbis = Channel.empty() + fasta = Channel.of([[id: 'fasta'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + fai = Channel.of([[id: 'fai'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + concatenate_vcfs = false + filter_vcfs = false + snv_consensus_calling = false + normalize_vcfs = true + varlociraptor_chunk_size = 15 + varlociraptor_scenario_germline = [] + varlociraptor_scenario_somatic = [] + varlociraptor_scenario_tumor_only = [] + + input[0] = tools + input[1] = cram_germline + input[2] = germline_vcfs + input[3] = germline_tbis + input[4] = cram_tumor_only + input[5] = tumor_only_vcfs + input[6] = tumor_only_tbis + input[7] = cram_somatic + input[8] = somatic_vcfs + input[9] = somatic_tbis + input[10] = fasta + input[11] = fai + input[12] = concatenate_vcfs + input[13] = filter_vcfs + input[14] = snv_consensus_calling + input[15] = normalize_vcfs + input[16] = varlociraptor_chunk_size + input[17] = varlociraptor_scenario_germline + input[18] = varlociraptor_scenario_somatic + input[19] = varlociraptor_scenario_tumor_only + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs, + workflow.out.tbis, + workflow.out.versions + ).match() } + ) + } + } + + test("POST_VARIANTCALLING - concatenation only - stub") { + + options "-stub" + + when { + + workflow { + """ + // Input setup + tools = 'haplotypecaller,strelka' + cram_germline = Channel.empty() + germline_vcfs = Channel.of( + [[id: 'test_sample'], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)] + ) + germline_tbis = Channel.empty() + cram_tumor_only = Channel.empty() + tumor_only_vcfs = Channel.empty() + tumor_only_tbis = Channel.empty() + cram_somatic = Channel.empty() + somatic_vcfs = Channel.empty() + somatic_tbis = Channel.empty() + fasta = Channel.of([[id: 'fasta'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + fai = Channel.of([[id: 'fai'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + concatenate_vcfs = true + filter_vcfs = false + snv_consensus_calling = false + normalize_vcfs = false + varlociraptor_chunk_size = 15 + varlociraptor_scenario_germline = [] + varlociraptor_scenario_somatic = [] + varlociraptor_scenario_tumor_only = [] + + input[0] = tools + input[1] = cram_germline + input[2] = germline_vcfs + input[3] = germline_tbis + input[4] = cram_tumor_only + input[5] = tumor_only_vcfs + input[6] = tumor_only_tbis + input[7] = cram_somatic + input[8] = somatic_vcfs + input[9] = somatic_tbis + input[10] = fasta + input[11] = fai + input[12] = concatenate_vcfs + input[13] = filter_vcfs + input[14] = snv_consensus_calling + input[15] = normalize_vcfs + input[16] = varlociraptor_chunk_size + input[17] = varlociraptor_scenario_germline + input[18] = varlociraptor_scenario_somatic + input[19] = varlociraptor_scenario_tumor_only + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs, + workflow.out.tbis, + workflow.out.versions + ).match() } + ) + } + } + + test("POST_VARIANTCALLING - varlociraptor germline") { + + tag "varlociraptor" + + when { + params{ + varlociraptor_chunk_size = 1 + } + workflow { + """ + // Input setup with real test data + tools = 'varlociraptor' + cram_germline = Channel.of( + [[id: 'test_sample', patient: 'test', sample: 'test', sex: 'XX', status: 0], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists:true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists:true)] + ) + germline_vcfs = Channel.of( + [[id: 'test_sample', patient: 'test', sample: 'test', sex: 'XX', status: 0, variantcaller: 'strelka'], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)] + ) + germline_tbis = Channel.empty() + cram_tumor_only = Channel.empty() + tumor_only_vcfs = Channel.empty() + tumor_only_tbis = Channel.empty() + cram_somatic = Channel.empty() + somatic_vcfs = Channel.empty() + somatic_tbis = Channel.empty() + fasta = Channel.of([[id: 'fasta'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + fai = Channel.of([[id: 'fai'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + concatenate_vcfs = false + filter_vcfs = false + snv_consensus_calling = false + normalize_vcfs = false + varlociraptor_chunk_size = params.varlociraptor_chunk_size + varlociraptor_scenario_germline = Channel.fromPath("${projectDir}/assets/varlociraptor_germline.yte.yaml").collect() + varlociraptor_scenario_somatic = Channel.fromPath("${projectDir}/assets/varlociraptor_somatic.yte.yaml").collect() + varlociraptor_scenario_tumor_only = Channel.fromPath("${projectDir}/assets/varlociraptor_tumor_only.yte.yaml").collect() + + input[0] = tools + input[1] = cram_germline + input[2] = germline_vcfs + input[3] = germline_tbis + input[4] = cram_tumor_only + input[5] = tumor_only_vcfs + input[6] = tumor_only_tbis + input[7] = cram_somatic + input[8] = somatic_vcfs + input[9] = somatic_tbis + input[10] = fasta + input[11] = fai + input[12] = concatenate_vcfs + input[13] = filter_vcfs + input[14] = snv_consensus_calling + input[15] = normalize_vcfs + input[16] = varlociraptor_chunk_size + input[17] = varlociraptor_scenario_germline + input[18] = varlociraptor_scenario_somatic + input[19] = varlociraptor_scenario_tumor_only + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs, + workflow.out.tbis, + workflow.out.versions + ).match() } + ) + } + } + + test("POST_VARIANTCALLING - varlociraptor germline - stub") { + + options "-stub" + + tag "varlociraptor" + + when { + workflow { + """ + // Input setup + tools = 'varlociraptor' + cram_germline = Channel.of( + [[id: 'test_sample', patient: 'test', sample: 'test', sex: 'XX', status: 0], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists:true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists:true)] + ) + germline_vcfs = Channel.of( + [[id: 'test_sample', patient: 'test', sample: 'test', sex: 'XX', status: 0, variantcaller: 'strelka'], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)] + ) + germline_tbis = Channel.empty() + cram_tumor_only = Channel.empty() + tumor_only_vcfs = Channel.empty() + tumor_only_tbis = Channel.empty() + cram_somatic = Channel.empty() + somatic_vcfs = Channel.empty() + somatic_tbis = Channel.empty() + fasta = Channel.of([[id: 'fasta'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + fai = Channel.of([[id: 'fai'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + concatenate_vcfs = false + filter_vcfs = false + snv_consensus_calling = false + normalize_vcfs = false + varlociraptor_chunk_size = 2 + varlociraptor_scenario_germline = Channel.fromPath("${projectDir}/assets/varlociraptor_germline.yte.yaml").collect() + varlociraptor_scenario_somatic = Channel.fromPath("${projectDir}/assets/varlociraptor_somatic.yte.yaml").collect() + varlociraptor_scenario_tumor_only = Channel.fromPath("${projectDir}/assets/varlociraptor_tumor_only.yte.yaml").collect() + + input[0] = tools + input[1] = cram_germline + input[2] = germline_vcfs + input[3] = germline_tbis + input[4] = cram_tumor_only + input[5] = tumor_only_vcfs + input[6] = tumor_only_tbis + input[7] = cram_somatic + input[8] = somatic_vcfs + input[9] = somatic_tbis + input[10] = fasta + input[11] = fai + input[12] = concatenate_vcfs + input[13] = filter_vcfs + input[14] = snv_consensus_calling + input[15] = normalize_vcfs + input[16] = varlociraptor_chunk_size + input[17] = varlociraptor_scenario_germline + input[18] = varlociraptor_scenario_somatic + input[19] = varlociraptor_scenario_tumor_only + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs, + workflow.out.tbis, + workflow.out.versions + ).match() } + ) + } + } + + test("POST_VARIANTCALLING - varlociraptor tumor-only") { + + tag "varlociraptor" + + when { + params{ + varlociraptor_chunk_size = 1 + } + workflow { + """ + // Input setup with real test data + tools = 'varlociraptor' + cram_germline = Channel.empty() + germline_vcfs = Channel.empty() + germline_tbis = Channel.empty() + cram_tumor_only = Channel.of( + [[id: 'tumor_sample', patient: 'test', sample: 'tumor', sex: 'XY', status: 1, contamination: 0.0], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists:true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists:true)] + ) + tumor_only_vcfs = Channel.of( + [[id: 'tumor_sample', patient: 'test', sample: 'tumor', sex: 'XY', status: 1, variantcaller: 'strelka'], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)] + ) + tumor_only_tbis = Channel.empty() + cram_somatic = Channel.empty() + somatic_vcfs = Channel.empty() + somatic_tbis = Channel.empty() + fasta = Channel.of([[id: 'fasta'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + fai = Channel.of([[id: 'fai'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + concatenate_vcfs = false + filter_vcfs = false + snv_consensus_calling = false + normalize_vcfs = false + varlociraptor_chunk_size = params.varlociraptor_chunk_size + varlociraptor_scenario_germline = Channel.fromPath("${projectDir}/assets/varlociraptor_germline.yte.yaml").collect() + varlociraptor_scenario_somatic = Channel.fromPath("${projectDir}/assets/varlociraptor_somatic.yte.yaml").collect() + varlociraptor_scenario_tumor_only = Channel.fromPath("${projectDir}/assets/varlociraptor_tumor_only.yte.yaml").collect() + + input[0] = tools + input[1] = cram_germline + input[2] = germline_vcfs + input[3] = germline_tbis + input[4] = cram_tumor_only + input[5] = tumor_only_vcfs + input[6] = tumor_only_tbis + input[7] = cram_somatic + input[8] = somatic_vcfs + input[9] = somatic_tbis + input[10] = fasta + input[11] = fai + input[12] = concatenate_vcfs + input[13] = filter_vcfs + input[14] = snv_consensus_calling + input[15] = normalize_vcfs + input[16] = varlociraptor_chunk_size + input[17] = varlociraptor_scenario_germline + input[18] = varlociraptor_scenario_somatic + input[19] = varlociraptor_scenario_tumor_only + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs, + workflow.out.tbis, + workflow.out.versions + ).match() } + ) + } + } + + test("POST_VARIANTCALLING - varlociraptor tumor-only - stub") { + + options "-stub" + tag "varlociraptor" + + when { + workflow { + """ + // Input setup + tools = 'varlociraptor' + cram_germline = Channel.empty() + germline_vcfs = Channel.empty() + germline_tbis = Channel.empty() + cram_tumor_only = Channel.of( + [[id: 'tumor_sample', patient: 'test', sample: 'tumor', sex: 'XY', status: 1, contamination: 0.0], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists:true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists:true)] + ) + tumor_only_vcfs = Channel.of( + [[id: 'tumor_sample', patient: 'test', sample: 'tumor', sex: 'XY', status: 1, variantcaller: 'strelka'], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)] + ) + tumor_only_tbis = Channel.empty() + cram_somatic = Channel.empty() + somatic_vcfs = Channel.empty() + somatic_tbis = Channel.empty() + fasta = Channel.of([[id: 'fasta'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + fai = Channel.of([[id: 'fai'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + concatenate_vcfs = false + filter_vcfs = false + snv_consensus_calling = false + normalize_vcfs = false + varlociraptor_chunk_size = 2 + varlociraptor_scenario_germline = Channel.fromPath("${projectDir}/assets/varlociraptor_germline.yte.yaml").collect() + varlociraptor_scenario_somatic = Channel.fromPath("${projectDir}/assets/varlociraptor_somatic.yte.yaml").collect() + varlociraptor_scenario_tumor_only = Channel.fromPath("${projectDir}/assets/varlociraptor_tumor_only.yte.yaml").collect() + + input[0] = tools + input[1] = cram_germline + input[2] = germline_vcfs + input[3] = germline_tbis + input[4] = cram_tumor_only + input[5] = tumor_only_vcfs + input[6] = tumor_only_tbis + input[7] = cram_somatic + input[8] = somatic_vcfs + input[9] = somatic_tbis + input[10] = fasta + input[11] = fai + input[12] = concatenate_vcfs + input[13] = filter_vcfs + input[14] = snv_consensus_calling + input[15] = normalize_vcfs + input[16] = varlociraptor_chunk_size + input[17] = varlociraptor_scenario_germline + input[18] = varlociraptor_scenario_somatic + input[19] = varlociraptor_scenario_tumor_only + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs, + workflow.out.tbis, + workflow.out.versions + ).match() } + ) + } + } + + test("POST_VARIANTCALLING - varlociraptor somatic") { + + tag "varlociraptor" + + when { + params{ + varlociraptor_chunk_size = 1 + } + workflow { + """ + // Input setup with real test data + tools = 'varlociraptor' + cram_germline = Channel.empty() + germline_vcfs = Channel.of( + [[id: 'normal_sample', patient: 'test', sample: 'normal', sex: 'XX', status: 0, variantcaller: 'strelka'], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz', checkIfExists: true)] + ) + germline_tbis = Channel.empty() + cram_tumor_only = Channel.empty() + tumor_only_vcfs = Channel.empty() + tumor_only_tbis = Channel.empty() + cram_somatic = Channel.of( + + [[id: 'tumor_sample', patient: 'test', sample: 'tumor', sex: 'XX', status: 1, normal_id: 'normal_sample', contamination: 0.0], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true)] + ) + somatic_vcfs = Channel.of( + [[id: 'tumor_sample', patient: 'test', sample: 'tumor', sex: 'XX', status: 1, normal_id: 'normal_sample', variantcaller: 'strelka'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_filtered_mutect2_calls.vcf.gz', checkIfExists: true)] + ) + somatic_tbis = Channel.empty() + fasta = Channel.of([[id: 'fasta'], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)]) + fai = Channel.of([[id: 'fai'], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true)]) + concatenate_vcfs = false + filter_vcfs = false + snv_consensus_calling = false + normalize_vcfs = false + varlociraptor_chunk_size = params.varlociraptor_chunk_size + varlociraptor_scenario_germline = Channel.fromPath("${projectDir}/assets/varlociraptor_germline.yte.yaml").collect() + varlociraptor_scenario_somatic = Channel.fromPath("${projectDir}/assets/varlociraptor_somatic.yte.yaml").collect() + varlociraptor_scenario_tumor_only = Channel.fromPath("${projectDir}/assets/varlociraptor_tumor_only.yte.yaml").collect() + + input[0] = tools + input[1] = cram_germline + input[2] = germline_vcfs + input[3] = germline_tbis + input[4] = cram_tumor_only + input[5] = tumor_only_vcfs + input[6] = tumor_only_tbis + input[7] = cram_somatic + input[8] = somatic_vcfs + input[9] = somatic_tbis + input[10] = fasta + input[11] = fai + input[12] = concatenate_vcfs + input[13] = filter_vcfs + input[14] = snv_consensus_calling + input[15] = normalize_vcfs + input[16] = varlociraptor_chunk_size + input[17] = varlociraptor_scenario_germline + input[18] = varlociraptor_scenario_somatic + input[19] = varlociraptor_scenario_tumor_only + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs, + workflow.out.tbis, + workflow.out.versions + ).match() } + ) + } + } + + test("POST_VARIANTCALLING - varlociraptor somatic - stub") { + + options "-stub" + + tag "varlociraptor" + + when { + + workflow { + """ + // Input setup + tools = 'varlociraptor' + cram_germline = Channel.empty() + germline_vcfs = Channel.of( + [[id: 'normal_sample', patient: 'test', sample: 'normal', sex: 'XX', status: 0, variantcaller: 'strelka'], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz', checkIfExists: true)] + ) + germline_tbis = Channel.empty() + cram_tumor_only = Channel.empty() + tumor_only_vcfs = Channel.empty() + tumor_only_tbis = Channel.empty() + cram_somatic = Channel.of( + [[id: 'tumor_sample', patient: 'test', sample: 'tumor', sex: 'XX', status: 1, normal_id: 'normal_sample', contamination: 0.0], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists:true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true)] + ) + somatic_vcfs = Channel.of( + [[id: 'tumor_sample', patient: 'test', sample: 'tumor', sex: 'XX', status: 1, normal_id: 'normal_sample', variantcaller: 'strelka'], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_filtered_mutect2_calls.vcf.gz', checkIfExists: true)] + ) + somatic_tbis = Channel.empty() + fasta = Channel.of([[id: 'fasta'], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)]) + fai = Channel.of([[id: 'fai'], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)]) + concatenate_vcfs = false + filter_vcfs = false + snv_consensus_calling = false + normalize_vcfs = false + varlociraptor_chunk_size = 2 + varlociraptor_scenario_germline = Channel.fromPath("${projectDir}/assets/varlociraptor_germline.yte.yaml").collect() + varlociraptor_scenario_somatic = Channel.fromPath("${projectDir}/assets/varlociraptor_somatic.yte.yaml").collect() + varlociraptor_scenario_tumor_only = Channel.fromPath("${projectDir}/assets/varlociraptor_tumor_only.yte.yaml").collect() + + input[0] = tools + input[1] = cram_germline + input[2] = germline_vcfs + input[3] = germline_tbis + input[4] = cram_tumor_only + input[5] = tumor_only_vcfs + input[6] = tumor_only_tbis + input[7] = cram_somatic + input[8] = somatic_vcfs + input[9] = somatic_tbis + input[10] = fasta + input[11] = fai + input[12] = concatenate_vcfs + input[13] = filter_vcfs + input[14] = snv_consensus_calling + input[15] = normalize_vcfs + input[16] = varlociraptor_chunk_size + input[17] = varlociraptor_scenario_germline + input[18] = varlociraptor_scenario_somatic + input[19] = varlociraptor_scenario_tumor_only + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs, + workflow.out.tbis, + workflow.out.versions + ).match() } + ) + } + } + + test("POST_VARIANTCALLING - normalization and consensus") { + + when { + workflow { + """ + // Input setup for consensus calling - requires multiple VCFs with different variantcallers + tools = '' + cram_germline = Channel.empty() + germline_vcfs = Channel.of( + [[id: 'test_sample', patient: 'test', sample: 'test', sex: 'XX', status: 0, variantcaller: 'freebayes'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)], + [[id: 'test_sample', patient: 'test', sample: 'test', sex: 'XX', status: 0, variantcaller: 'strelka'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)] + ) + germline_tbis = Channel.of( + [[id: 'test_sample', patient: 'test', sample: 'test', sex: 'XX', status: 0, variantcaller: 'freebayes'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)], + [[id: 'test_sample', patient: 'test', sample: 'test', sex: 'XX', status: 0, variantcaller: 'strelka'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)] + ) + cram_tumor_only = Channel.empty() + tumor_only_vcfs = Channel.empty() + tumor_only_tbis = Channel.empty() + cram_somatic = Channel.empty() + somatic_vcfs = Channel.empty() + somatic_tbis = Channel.empty() + fasta = Channel.of([[id: 'fasta'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + fai = Channel.of([[id: 'fai'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + concatenate_vcfs = false + filter_vcfs = false + snv_consensus_calling = true + normalize_vcfs = true + varlociraptor_chunk_size = 15 + varlociraptor_scenario_germline = [] + varlociraptor_scenario_somatic = [] + varlociraptor_scenario_tumor_only = [] + + input[0] = tools + input[1] = cram_germline + input[2] = germline_vcfs + input[3] = germline_tbis + input[4] = cram_tumor_only + input[5] = tumor_only_vcfs + input[6] = tumor_only_tbis + input[7] = cram_somatic + input[8] = somatic_vcfs + input[9] = somatic_tbis + input[10] = fasta + input[11] = fai + input[12] = concatenate_vcfs + input[13] = filter_vcfs + input[14] = snv_consensus_calling + input[15] = normalize_vcfs + input[16] = varlociraptor_chunk_size + input[17] = varlociraptor_scenario_germline + input[18] = varlociraptor_scenario_somatic + input[19] = varlociraptor_scenario_tumor_only + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs, + workflow.out.tbis, + workflow.out.versions + ).match() } + ) + } + } + + test("POST_VARIANTCALLING - filter, normalization and consensus") { + + when { + workflow { + """ + // Input setup for full post-processing pipeline + tools = '' + cram_germline = Channel.empty() + germline_vcfs = Channel.of( + [[id: 'test_sample', patient: 'test', sample: 'test', sex: 'XX', status: 0, variantcaller: 'freebayes'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)], + [[id: 'test_sample', patient: 'test', sample: 'test', sex: 'XX', status: 0, variantcaller: 'haplotypecaller'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)] + ) + germline_tbis = Channel.of( + [[id: 'test_sample', patient: 'test', sample: 'test', sex: 'XX', status: 0, variantcaller: 'freebayes'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)], + [[id: 'test_sample', patient: 'test', sample: 'test', sex: 'XX', status: 0, variantcaller: 'haplotypecaller'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)] + ) + cram_tumor_only = Channel.empty() + tumor_only_vcfs = Channel.empty() + tumor_only_tbis = Channel.empty() + cram_somatic = Channel.empty() + somatic_vcfs = Channel.empty() + somatic_tbis = Channel.empty() + fasta = Channel.of([[id: 'fasta'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + fai = Channel.of([[id: 'fai'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + concatenate_vcfs = false + filter_vcfs = true + snv_consensus_calling = true + normalize_vcfs = true + varlociraptor_chunk_size = 15 + varlociraptor_scenario_germline = [] + varlociraptor_scenario_somatic = [] + varlociraptor_scenario_tumor_only = [] + + input[0] = tools + input[1] = cram_germline + input[2] = germline_vcfs + input[3] = germline_tbis + input[4] = cram_tumor_only + input[5] = tumor_only_vcfs + input[6] = tumor_only_tbis + input[7] = cram_somatic + input[8] = somatic_vcfs + input[9] = somatic_tbis + input[10] = fasta + input[11] = fai + input[12] = concatenate_vcfs + input[13] = filter_vcfs + input[14] = snv_consensus_calling + input[15] = normalize_vcfs + input[16] = varlociraptor_chunk_size + input[17] = varlociraptor_scenario_germline + input[18] = varlociraptor_scenario_somatic + input[19] = varlociraptor_scenario_tumor_only + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs, + workflow.out.tbis, + workflow.out.versions + ).match() } + ) + } + } + + test("POST_VARIANTCALLING - normalization and consensus - stub") { + + options "-stub" + + when { + workflow { + """ + // Input setup for consensus stub test + tools = '' + cram_germline = Channel.empty() + germline_vcfs = Channel.of( + [[id: 'test_sample', patient: 'test', sample: 'test', sex: 'XX', status: 0, variantcaller: 'freebayes'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)], + [[id: 'test_sample', patient: 'test', sample: 'test', sex: 'XX', status: 0, variantcaller: 'strelka'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true)] + ) + germline_tbis = Channel.of( + [[id: 'test_sample', patient: 'test', sample: 'test', sex: 'XX', status: 0, variantcaller: 'freebayes'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)], + [[id: 'test_sample', patient: 'test', sample: 'test', sex: 'XX', status: 0, variantcaller: 'strelka'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)] + ) + cram_tumor_only = Channel.empty() + tumor_only_vcfs = Channel.empty() + tumor_only_tbis = Channel.empty() + cram_somatic = Channel.empty() + somatic_vcfs = Channel.empty() + somatic_tbis = Channel.empty() + fasta = Channel.of([[id: 'fasta'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + fai = Channel.of([[id: 'fai'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]) + concatenate_vcfs = false + filter_vcfs = false + snv_consensus_calling = true + normalize_vcfs = true + varlociraptor_chunk_size = 15 + varlociraptor_scenario_germline = [] + varlociraptor_scenario_somatic = [] + varlociraptor_scenario_tumor_only = [] + + input[0] = tools + input[1] = cram_germline + input[2] = germline_vcfs + input[3] = germline_tbis + input[4] = cram_tumor_only + input[5] = tumor_only_vcfs + input[6] = tumor_only_tbis + input[7] = cram_somatic + input[8] = somatic_vcfs + input[9] = somatic_tbis + input[10] = fasta + input[11] = fai + input[12] = concatenate_vcfs + input[13] = filter_vcfs + input[14] = snv_consensus_calling + input[15] = normalize_vcfs + input[16] = varlociraptor_chunk_size + input[17] = varlociraptor_scenario_germline + input[18] = varlociraptor_scenario_somatic + input[19] = varlociraptor_scenario_tumor_only + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.vcfs, + workflow.out.tbis, + workflow.out.versions + ).match() } + ) + } + } +} diff --git a/subworkflows/local/post_variantcalling/tests/main.nf.test.snap b/subworkflows/local/post_variantcalling/tests/main.nf.test.snap new file mode 100644 index 0000000000..fefd5b66a9 --- /dev/null +++ b/subworkflows/local/post_variantcalling/tests/main.nf.test.snap @@ -0,0 +1,459 @@ +{ + "POST_VARIANTCALLING - concatenation only": { + "content": [ + [ + [ + { + "id": "test_sample", + "num": "1" + }, + "test_sample.germline.vcf.gz:md5,64dc493e217ce2c6124c464fc5349e21" + ], + [ + { + "id": "test_sample", + "num": "2" + }, + "test_sample.germline.vcf.gz:md5,64dc493e217ce2c6124c464fc5349e21" + ] + ], + [ + [ + { + "id": "test_sample", + "num": "1" + }, + "test_sample.germline.vcf.gz.tbi:md5,eda316880fe501671a66e26aefa7abf6" + ], + [ + { + "id": "test_sample", + "num": "2" + }, + "test_sample.germline.vcf.gz.tbi:md5,eda316880fe501671a66e26aefa7abf6" + ] + ], + [ + "versions.yml:md5,047305afd4d6a35ccc6c8cae2d0de0cb", + "versions.yml:md5,047305afd4d6a35ccc6c8cae2d0de0cb", + "versions.yml:md5,594bb4ec971ba178d11d6f4c32fa5398", + "versions.yml:md5,594bb4ec971ba178d11d6f4c32fa5398", + "versions.yml:md5,b5b952ec9c95cc575f4305901a4af77c", + "versions.yml:md5,b5b952ec9c95cc575f4305901a4af77c", + "versions.yml:md5,eeffdab3a5d087ebcc8fe19bd27c900f", + "versions.yml:md5,eeffdab3a5d087ebcc8fe19bd27c900f", + "versions.yml:md5,f9826bc6c76002549649f51bda91a223", + "versions.yml:md5,f9826bc6c76002549649f51bda91a223" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T15:20:03.460554" + }, + "POST_VARIANTCALLING - normalization and concatenation": { + "content": [ + [ + [ + { + "id": "test_sample", + "num": "1" + }, + "test_sample.germline.vcf.gz:md5,28c41b6f6a4856cbdace4242fb0f7f36" + ], + [ + { + "id": "test_sample", + "num": "1" + }, + "test_sample.null.norm.vcf.gz:md5,85763328c6ca66c55fe00a4496705b48" + ], + [ + { + "id": "test_sample", + "num": "2" + }, + "test_sample.germline.vcf.gz:md5,28c41b6f6a4856cbdace4242fb0f7f36" + ] + ], + [ + [ + { + "id": "test_sample", + "num": "1" + }, + "test_sample.germline.vcf.gz.tbi:md5,eda316880fe501671a66e26aefa7abf6" + ], + [ + { + "id": "test_sample", + "num": "1" + }, + "test_sample.null.norm.vcf.gz.tbi:md5,4331f1e4e2ba3cd35241c62e6a3e3a69" + ], + [ + { + "id": "test_sample", + "num": "2" + }, + "test_sample.germline.vcf.gz.tbi:md5,eda316880fe501671a66e26aefa7abf6" + ] + ], + [ + "versions.yml:md5,047305afd4d6a35ccc6c8cae2d0de0cb", + "versions.yml:md5,047305afd4d6a35ccc6c8cae2d0de0cb", + "versions.yml:md5,594bb4ec971ba178d11d6f4c32fa5398", + "versions.yml:md5,594bb4ec971ba178d11d6f4c32fa5398", + "versions.yml:md5,6ea9087108ce40c482f810d350240e2e", + "versions.yml:md5,8e5af507f2006539314960cc13610d2c", + "versions.yml:md5,af9213a1434d504b9b19d0236f508046", + "versions.yml:md5,af9213a1434d504b9b19d0236f508046", + "versions.yml:md5,b5b952ec9c95cc575f4305901a4af77c", + "versions.yml:md5,b5b952ec9c95cc575f4305901a4af77c", + "versions.yml:md5,dee58c88dd03c46f90d7b83578b4ae82", + "versions.yml:md5,dee58c88dd03c46f90d7b83578b4ae82", + "versions.yml:md5,e79c4d48ba8ecd05bbd54969fa63fd0a", + "versions.yml:md5,eeffdab3a5d087ebcc8fe19bd27c900f", + "versions.yml:md5,eeffdab3a5d087ebcc8fe19bd27c900f", + "versions.yml:md5,f9826bc6c76002549649f51bda91a223", + "versions.yml:md5,f9826bc6c76002549649f51bda91a223" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T15:20:28.455488" + }, + "POST_VARIANTCALLING - concatenation only - stub": { + "content": [ + [ + [ + { + "id": "test_sample" + }, + "test_sample.germline.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + [ + { + "id": "test_sample" + }, + "test_sample.germline.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + "versions.yml:md5,047305afd4d6a35ccc6c8cae2d0de0cb", + "versions.yml:md5,594bb4ec971ba178d11d6f4c32fa5398", + "versions.yml:md5,b5b952ec9c95cc575f4305901a4af77c", + "versions.yml:md5,eeffdab3a5d087ebcc8fe19bd27c900f", + "versions.yml:md5,f9826bc6c76002549649f51bda91a223" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T13:52:44.472169" + }, + "POST_VARIANTCALLING - varlociraptor tumor-only": { + "content": [ + [ + [ + { + "id": "tumor_sample", + "patient": "test", + "sample": "tumor", + "sex": "XY", + "status": 1, + "contamination": 0.0, + "variantcaller": "strelka", + "postprocess": "varlociraptor", + "chunk": "0" + }, + "tumor_sample.strelka.tumor_only.varlociraptor.vcf.gz:md5,5f66b36044623154851ebc745607a0aa" + ] + ], + [ + [ + { + "id": "tumor_sample", + "patient": "test", + "sample": "tumor", + "sex": "XY", + "status": 1, + "contamination": 0.0, + "variantcaller": "strelka", + "postprocess": "varlociraptor", + "chunk": "0" + }, + "tumor_sample.strelka.tumor_only.varlociraptor.vcf.gz.tbi:md5,6f17310cd152968349269107e53371c2" + ] + ], + [ + "versions.yml:md5,2944a0f9af0e3918bb9833c87df1ebb6", + "versions.yml:md5,29f73f6b28ae158785ebea38843af786", + "versions.yml:md5,5d2a1f6080254676bff1ef3b27ab641f", + "versions.yml:md5,c35cd6c1892a24d29169bc79e7623bd9", + "versions.yml:md5,dc55a53ff7729dc648ac35db31ec5656", + "versions.yml:md5,faeabb0df6266c84e0fcd7f39d60b759" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T17:16:36.709315" + }, + "POST_VARIANTCALLING - varlociraptor somatic": { + "content": [ + [ + [ + { + "id": "tumor_sample", + "patient": "test", + "sample": "tumor", + "sex": "XX", + "status": 1, + "normal_id": "normal_sample", + "contamination": 0.0, + "variantcaller": "strelka", + "postprocess": "varlociraptor", + "chunk": "0" + }, + "tumor_sample.strelka.somatic.varlociraptor.vcf.gz:md5,8714f2326fad338b964dd3ff5628785d" + ] + ], + [ + [ + { + "id": "tumor_sample", + "patient": "test", + "sample": "tumor", + "sex": "XX", + "status": 1, + "normal_id": "normal_sample", + "contamination": 0.0, + "variantcaller": "strelka", + "postprocess": "varlociraptor", + "chunk": "0" + }, + "tumor_sample.strelka.somatic.varlociraptor.vcf.gz.tbi:md5,25d0445f304a4e9db421b880f866cc2c" + ] + ], + [ + "versions.yml:md5,106dcb110a6d3d094298500f4030ef8f", + "versions.yml:md5,1b666f0fbac8556cef352a3a4fa60dbe", + "versions.yml:md5,1bca8763ec1f55aa5987e4f4301ed8cf", + "versions.yml:md5,26d247da3c326a0b09dff1bea9c55c52", + "versions.yml:md5,31e8ad0d1040730aceafc75e5c9fedac", + "versions.yml:md5,3aa2ccd7a5a2d5ecf2afff04a39e0f55", + "versions.yml:md5,641cf79d74888d2838e17449cfb5a98c", + "versions.yml:md5,65c36101acc269f90fcc762f4c515073", + "versions.yml:md5,6b1c3a74fcd23a4a7f78ddf4ef6c6f99", + "versions.yml:md5,ae66ca7ac06bbe30c3534b20ade99b48", + "versions.yml:md5,d28b191a8b7e7f1db9f269fc372d950e", + "versions.yml:md5,e75820d6c821e7da2cd08f5e42c9139e" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T18:17:55.714344" + }, + "POST_VARIANTCALLING - normalization only": { + "content": [ + [ + [ + { + "id": "test_tumor" + }, + "test_tumor.null.norm.vcf.gz:md5,8850056529ea417dd7906110b6008852" + ] + ], + [ + [ + { + "id": "test_tumor" + }, + "test_tumor.null.norm.vcf.gz.tbi:md5,4331f1e4e2ba3cd35241c62e6a3e3a69" + ] + ], + [ + "versions.yml:md5,6ea9087108ce40c482f810d350240e2e", + "versions.yml:md5,8e5af507f2006539314960cc13610d2c", + "versions.yml:md5,af9213a1434d504b9b19d0236f508046", + "versions.yml:md5,af9213a1434d504b9b19d0236f508046", + "versions.yml:md5,af9213a1434d504b9b19d0236f508046", + "versions.yml:md5,dee58c88dd03c46f90d7b83578b4ae82", + "versions.yml:md5,dee58c88dd03c46f90d7b83578b4ae82", + "versions.yml:md5,dee58c88dd03c46f90d7b83578b4ae82", + "versions.yml:md5,e79c4d48ba8ecd05bbd54969fa63fd0a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T13:51:32.78009" + }, + "POST_VARIANTCALLING - varlociraptor germline - stub": { + "content": [ + [ + + ], + [ + + ], + [ + "versions.yml:md5,1bca8763ec1f55aa5987e4f4301ed8cf", + "versions.yml:md5,25260e5ecf0e3f964c7e68c7eaa01d52", + "versions.yml:md5,45997f1ffe1a1c5fceda0f00b670faa8", + "versions.yml:md5,5a1892540eb326a685a22f9bc6f6a6b6", + "versions.yml:md5,9185ef5a5fa1849e93823a668c488c5c", + "versions.yml:md5,97327c4f942c0205e15b8cb4e363825b", + "versions.yml:md5,d28b191a8b7e7f1db9f269fc372d950e" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T13:53:27.063259" + }, + "POST_VARIANTCALLING - varlociraptor somatic - stub": { + "content": [ + [ + + ], + [ + + ], + [ + "versions.yml:md5,106dcb110a6d3d094298500f4030ef8f", + "versions.yml:md5,1b666f0fbac8556cef352a3a4fa60dbe", + "versions.yml:md5,1bca8763ec1f55aa5987e4f4301ed8cf", + "versions.yml:md5,26d247da3c326a0b09dff1bea9c55c52", + "versions.yml:md5,31e8ad0d1040730aceafc75e5c9fedac", + "versions.yml:md5,3aa2ccd7a5a2d5ecf2afff04a39e0f55", + "versions.yml:md5,641cf79d74888d2838e17449cfb5a98c", + "versions.yml:md5,65c36101acc269f90fcc762f4c515073", + "versions.yml:md5,6b1c3a74fcd23a4a7f78ddf4ef6c6f99", + "versions.yml:md5,ae66ca7ac06bbe30c3534b20ade99b48", + "versions.yml:md5,d28b191a8b7e7f1db9f269fc372d950e", + "versions.yml:md5,e75820d6c821e7da2cd08f5e42c9139e" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T18:19:33.661409" + }, + "POST_VARIANTCALLING - varlociraptor germline": { + "content": [ + [ + [ + { + "id": "test_sample", + "patient": "test", + "sample": "test", + "sex": "XX", + "status": 0, + "variantcaller": "strelka", + "postprocess": "varlociraptor", + "chunk": "0" + }, + "test_sample.strelka.germline.varlociraptor.vcf.gz:md5,4a28a6ca6dc8329c5df79fcd801e7d7c" + ] + ], + [ + [ + { + "id": "test_sample", + "patient": "test", + "sample": "test", + "sex": "XX", + "status": 0, + "variantcaller": "strelka", + "postprocess": "varlociraptor", + "chunk": "0" + }, + "test_sample.strelka.germline.varlociraptor.vcf.gz.tbi:md5,9e063f81e588cb3f7ad883463c230153" + ] + ], + [ + "versions.yml:md5,1bca8763ec1f55aa5987e4f4301ed8cf", + "versions.yml:md5,25260e5ecf0e3f964c7e68c7eaa01d52", + "versions.yml:md5,45997f1ffe1a1c5fceda0f00b670faa8", + "versions.yml:md5,5a1892540eb326a685a22f9bc6f6a6b6", + "versions.yml:md5,9185ef5a5fa1849e93823a668c488c5c", + "versions.yml:md5,97327c4f942c0205e15b8cb4e363825b", + "versions.yml:md5,d28b191a8b7e7f1db9f269fc372d950e" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T17:16:13.83981" + }, + "POST_VARIANTCALLING - normalization only - stub": { + "content": [ + [ + [ + { + "id": "test_sample" + }, + "test_sample.null.norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + [ + { + "id": "test_sample" + }, + "test_sample.null.norm.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + "versions.yml:md5,6ea9087108ce40c482f810d350240e2e", + "versions.yml:md5,8e5af507f2006539314960cc13610d2c", + "versions.yml:md5,af9213a1434d504b9b19d0236f508046", + "versions.yml:md5,dee58c88dd03c46f90d7b83578b4ae82", + "versions.yml:md5,e79c4d48ba8ecd05bbd54969fa63fd0a" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T13:52:26.532241" + }, + "POST_VARIANTCALLING - varlociraptor tumor-only - stub": { + "content": [ + [ + + ], + [ + + ], + [ + "versions.yml:md5,2944a0f9af0e3918bb9833c87df1ebb6", + "versions.yml:md5,29f73f6b28ae158785ebea38843af786", + "versions.yml:md5,5d2a1f6080254676bff1ef3b27ab641f", + "versions.yml:md5,c35cd6c1892a24d29169bc79e7623bd9", + "versions.yml:md5,dc55a53ff7729dc648ac35db31ec5656", + "versions.yml:md5,faeabb0df6266c84e0fcd7f39d60b759" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T16:51:00.574162" + } +} \ No newline at end of file diff --git a/subworkflows/local/post_variantcalling/tests/nextflow.config b/subworkflows/local/post_variantcalling/tests/nextflow.config new file mode 100644 index 0000000000..fa3244f159 --- /dev/null +++ b/subworkflows/local/post_variantcalling/tests/nextflow.config @@ -0,0 +1,7 @@ +// Minimal config file for tests +process { + // Fudge the naming to make sure we don't have file name clashes in tests + withName: 'POST_VARIANTCALLING:CONCATENATE_GERMLINE_VCFS:ADD_INFO_TO_VCF' { + ext.prefix = { "${meta.id}.${meta.num}"} + } +} diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf deleted file mode 100644 index 846875fc6d..0000000000 --- a/subworkflows/local/prepare_genome.nf +++ /dev/null @@ -1,179 +0,0 @@ -// -// PREPARE GENOME -// - -// Initialize channels based on params or indices that were just built - -include { BUILD_INTERVALS } from '../../modules/local/build_intervals/main' -include { BWA_INDEX as BWAMEM1_INDEX } from '../../modules/nf-core/modules/bwa/index/main' -include { BWAMEM2_INDEX } from '../../modules/nf-core/modules/bwamem2/index/main' -include { CREATE_INTERVALS_BED } from '../../modules/local/create_intervals_bed/main' -include { GATK4_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/modules/gatk4/createsequencedictionary/main' -include { GATK4_INTERVALLISTTOBED } from '../../modules/local/gatk4/intervallisttobed' -include { MSISENSORPRO_SCAN } from '../../modules/nf-core/modules/msisensorpro/scan/main' -include { SAMTOOLS_FAIDX } from '../../modules/nf-core/modules/samtools/faidx/main' -include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_SPLIT } from '../../modules/nf-core/modules/tabix/bgziptabix/main' -include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_ALL } from '../../modules/nf-core/modules/tabix/bgziptabix/main' -include { TABIX_TABIX as TABIX_DBSNP } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_GERMLINE_RESOURCE } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_KNOWN_INDELS } from '../../modules/nf-core/modules/tabix/tabix/main' -include { TABIX_TABIX as TABIX_PON } from '../../modules/nf-core/modules/tabix/tabix/main' - -workflow PREPARE_GENOME { - take: - dbsnp // channel: [optional] dbsnp - fasta // channel: [mandatory] fasta - fasta_fai // channel: [optional] fasta_fai - germline_resource // channel: [optional] germline_resource - known_indels // channel: [optional] known_indels - pon // channel: [optional] pon - tools // value: [mandatory] tools - step // value: [mandatory] step - - main: - - if(!tools) tools = "" - - ch_versions = Channel.empty() - - ch_bwa = Channel.empty() - if (!(params.bwa) && 'mapping' in step) { - if (params.aligner == "bwa-mem") { - BWAMEM1_INDEX(fasta) - ch_bwa = BWAMEM1_INDEX.out.index - ch_versions = ch_versions.mix(BWAMEM1_INDEX.out.versions) - } else if (params.aligner == "bwa-mem2") { - BWAMEM2_INDEX(fasta) - ch_bwa = BWAMEM2_INDEX.out.index - ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions) - } - } - - ch_dict = Channel.empty() - if (!(params.dict) && !('annotate' in step) && !('controlfreec' in step)) { - GATK4_CREATESEQUENCEDICTIONARY(fasta) - ch_dict = GATK4_CREATESEQUENCEDICTIONARY.out.dict - ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) - } - - ch_fasta_fai = Channel.empty() - if (fasta_fai) ch_fasta_fai = fasta_fai - if (!(params.fasta_fai) && !('annotate' in step)) { - SAMTOOLS_FAIDX(fasta.map{ it -> [[id:it[0].getName()], it]}) - ch_fasta_fai = SAMTOOLS_FAIDX.out.fai.map{ meta, fai -> [fai] } - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) - } - - ch_dbsnp_tbi = Channel.empty() - if (!(params.dbsnp_tbi) && params.dbsnp && ('mapping' in step || 'prepare_recalibration' in step || tools.contains('controlfreec') || tools.contains('haplotypecaller') || tools.contains('mutect2') || tools.contains('tnscope'))) { - TABIX_DBSNP(dbsnp.map{ it -> [[id:it[0].baseName], it] }) - ch_dbsnp_tbi = TABIX_DBSNP.out.tbi.map{ meta, tbi -> [tbi] } - ch_versions = ch_versions.mix(TABIX_DBSNP.out.versions) - } - - ch_germline_resource_tbi = Channel.empty() - if (!(params.germline_resource_tbi) && params.germline_resource && tools.contains('mutect2')) { - TABIX_GERMLINE_RESOURCE(germline_resource.map{ it -> [[id:it[0].baseName], it] }) - ch_germline_resource_tbi = TABIX_GERMLINE_RESOURCE.out.tbi.map{ meta, tbi -> [tbi] } - ch_versions = ch_versions.mix(TABIX_GERMLINE_RESOURCE.out.versions) - } - - ch_known_indels_tbi = Channel.empty() - if (!(params.known_indels_tbi) && params.known_indels && ('mapping' in step || 'prepare_recalibration' in step)) { - TABIX_KNOWN_INDELS(known_indels.map{ it -> [[id:it[0].baseName], it] }) - ch_known_indels_tbi = TABIX_KNOWN_INDELS.out.tbi.map{ meta, tbi -> [tbi] } - ch_versions = ch_versions.mix(TABIX_KNOWN_INDELS.out.versions) - } - - ch_pon_tbi = Channel.empty() - if (!(params.pon_tbi) && params.pon && (tools.contains('tnscope') || tools.contains('mutect2'))) { - TABIX_PON(pon.map{ it -> [[id:it[0].baseName], it] }) - ch_pon_tbi = TABIX_PON.out.tbi.map{ meta, tbi -> [tbi] } - ch_versions = ch_versions.mix(TABIX_PON.out.versions) - } - - ch_msisensorpro_scan = Channel.empty() - if (tools.contains('msisensorpro')) { - MSISENSORPRO_SCAN(fasta.map{it -> [[id:it[0].baseName], it]}) - ch_msisensorpro_scan = MSISENSORPRO_SCAN.out.list.map{ meta, list -> list} - ch_versions = ch_versions.mix(MSISENSORPRO_SCAN.out.versions) - } - - ch_intervals = Channel.empty() - ch_intervals_bed_gz_tbi = Channel.empty() - ch_intervals_combined_bed_gz_tbi = Channel.empty() //Create bed.gz and bed.gz.tbi for input/or created interval file. It contains ALL regions. - - tabix_in_combined = Channel.empty() - if (params.no_intervals) { - - file("${params.outdir}/no_intervals.bed").text = "no_intervals\n" - ch_intervals = Channel.fromPath(file("${params.outdir}/no_intervals.bed")) - tabix_in_combined = ch_intervals.map{it -> [[id:it.getName()], it] } - - } else if (!('annotate' in step) && !('controlfreec' in step)) { - if (!params.intervals){ - - BUILD_INTERVALS(ch_fasta_fai) - tabix_in_combined = BUILD_INTERVALS.out.bed.map{it -> [[id:it.getName()], it] } - ch_intervals = CREATE_INTERVALS_BED(BUILD_INTERVALS.out.bed) - - }else{ - - tabix_in_combined = Channel.fromPath(file(params.intervals)).map{it -> [[id:it.baseName], it] } - if(!params.intervals.endsWith(".bed")){ - GATK4_INTERVALLISTTOBED(tabix_in_combined) - tabix_in_combined = GATK4_INTERVALLISTTOBED.out.bed - ch_versions = ch_versions.mix(GATK4_INTERVALLISTTOBED.out.versions) - } - ch_intervals = CREATE_INTERVALS_BED(file(params.intervals)) - - } - } - - if (!('annotate' in step) && !('controlfreec' in step)){ - - TABIX_BGZIPTABIX_INTERVAL_ALL(tabix_in_combined) - ch_intervals_combined_bed_gz_tbi = TABIX_BGZIPTABIX_INTERVAL_ALL.out.gz_tbi.map{ meta, bed, tbi -> [bed, tbi] } - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX_INTERVAL_ALL.out.versions) - - if (!params.no_intervals) { - ch_intervals = ch_intervals.flatten() - .map{ intervalFile -> - def duration = 0.0 - for (line in intervalFile.readLines()) { - final fields = line.split('\t') - if (fields.size() >= 5) duration += fields[4].toFloat() - else { - start = fields[1].toInteger() - end = fields[2].toInteger() - duration += (end - start) / params.nucleotides_per_second - } - } - [duration, intervalFile] - }.toSortedList({ a, b -> b[0] <=> a[0] }) - .flatten().collate(2) - .map{duration, intervalFile -> intervalFile} - } - - // Create bed.gz and bed.gz.tbi for each interval file. They are split by region (see above) - tabix_in = ch_intervals.map{it -> [[id:it.baseName], it] } - TABIX_BGZIPTABIX_INTERVAL_SPLIT(tabix_in) - ch_intervals_bed_gz_tbi = TABIX_BGZIPTABIX_INTERVAL_SPLIT.out.gz_tbi.map{ meta, bed, tbi -> [bed, tbi] } - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX_INTERVAL_SPLIT.out.versions) - } - - emit: - bwa = ch_bwa // path: {bwa,bwamem2}/index - dbsnp_tbi = ch_dbsnp_tbi // path: dbsnb.vcf.gz.tbi - dict = ch_dict // path: genome.fasta.dict - fasta_fai = ch_fasta_fai // path: genome.fasta.fai - germline_resource_tbi = ch_germline_resource_tbi // path: germline_resource.vcf.gz.tbi - known_indels_tbi = ch_known_indels_tbi.collect() // path: {known_indels*}.vcf.gz.tbi - msisensorpro_scan = ch_msisensorpro_scan // path: genome_msi.list - pon_tbi = ch_pon_tbi // path: pon.vcf.gz.tbi - intervals_bed = ch_intervals // path: intervals.bed [intervals split for parallel execution] - intervals_bed_gz_tbi = ch_intervals_bed_gz_tbi // path: target.bed.gz, target.bed.gz.tbi [intervals split for parallel execution] - intervals_combined_bed_gz_tbi = ch_intervals_combined_bed_gz_tbi // path: interval.bed.gz, interval.bed.gz.tbi [all intervals in one file] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf new file mode 100644 index 0000000000..3b8de518e7 --- /dev/null +++ b/subworkflows/local/prepare_genome/main.nf @@ -0,0 +1,328 @@ +include { BBMAP_BBSPLIT as BBMAP_INDEX } from '../../../modules/nf-core/bbmap/bbsplit' +include { BWAMEM2_INDEX } from '../../../modules/nf-core/bwamem2/index' +include { BWA_INDEX as BWAMEM1_INDEX } from '../../../modules/nf-core/bwa/index' +include { DRAGMAP_HASHTABLE } from '../../../modules/nf-core/dragmap/hashtable' +include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary' +include { MSISENSORPRO_SCAN } from '../../../modules/nf-core/msisensorpro/scan' +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx' +include { TABIX_TABIX as TABIX_BCFTOOLS_ANNOTATIONS } from '../../../modules/nf-core/tabix/tabix' +include { TABIX_TABIX as TABIX_DBSNP } from '../../../modules/nf-core/tabix/tabix' +include { TABIX_TABIX as TABIX_GERMLINE_RESOURCE } from '../../../modules/nf-core/tabix/tabix' +include { TABIX_TABIX as TABIX_KNOWN_INDELS } from '../../../modules/nf-core/tabix/tabix' +include { TABIX_TABIX as TABIX_KNOWN_SNPS } from '../../../modules/nf-core/tabix/tabix' +include { TABIX_TABIX as TABIX_PON } from '../../../modules/nf-core/tabix/tabix' +include { UNTAR as UNTAR_BBSPLIT_INDEX } from '../../../modules/nf-core/untar' +include { UNTAR as UNTAR_CHR_DIR } from '../../../modules/nf-core/untar' +include { UNTAR as UNTAR_MSISENSOR2_MODELS } from '../../../modules/nf-core/untar' +include { UNZIP as UNZIP_ALLELES } from '../../../modules/nf-core/unzip' +include { UNZIP as UNZIP_GC } from '../../../modules/nf-core/unzip' +include { UNZIP as UNZIP_LOCI } from '../../../modules/nf-core/unzip' +include { UNZIP as UNZIP_RT } from '../../../modules/nf-core/unzip' + +workflow PREPARE_GENOME { + take: + ascat_alleles_in // params.ascat_alleles + ascat_loci_in // params.ascat_loci + ascat_loci_gc_in // params.ascat_loci_gc + ascat_loci_rt_in // params.ascat_loci_rt + bbsplit_fasta_list_in // params.bbsplit_fasta_list + bbsplit_index_in // params.bbsplit_index + bcftools_annotations_in // params.bcftools_annotations + bcftools_annotations_tbi_in // params.bcftools_annotations + bwa_in // params.bwa + bwamem2_in // params.bwamem2 + chr_dir_in // params.chr_dir + dbsnp_in // params.dbsnp + dbsnp_tbi_in // params.dbsnp_tbi + dict_in // params.dict + dragmap_in // params.dragmap + fasta_in // params.fasta + fasta_fai_in // params.fasta_fai + germline_resource_in // params.germline_resource + germline_resource_tbi_in // params.germline_resource_tbi + known_indels_in // params.known_indels + known_indels_tbi_in // params.known_indels_tbi + known_snps_in // params.known_snps + known_snps_tbi_in // params.known_snps_tbi + msisensor2_models_in // channel: [optional] msisensor2_models + msisensorpro_scan_in // channel: [optional] msisensorpro_scan + pon_in // params.pon + pon_tbi_in // params.pon_tbi + aligner // params.aligner + step // params.step + tools // params.tools + vep_include_fasta // params.vep_include_fasta + + main: + versions = Channel.empty() + + // TODO: EXTRACT FASTA FILE? + fasta = fasta_in ? Channel.fromPath(fasta_in).map { fasta -> [[id: fasta.baseName], fasta] }.collect() : Channel.empty() + vep_fasta = vep_include_fasta ? fasta : [[id: 'null'], []] + + if (step == 'mapping') { + if (!bwa_in && (aligner == "bwa-mem" || aligner == "sentieon-bwamem" || aligner == "parabricks")) { + BWAMEM1_INDEX(fasta) + index_alignment = BWAMEM1_INDEX.out.index.collect() + versions = versions.mix(BWAMEM1_INDEX.out.versions) + } + else if (aligner == "bwa-mem" || aligner == "sentieon-bwamem" || aligner == "parabricks") { + index_alignment = Channel.fromPath(bwa_in).map { index -> [[id: 'bwa'], index] }.collect() + } + else if (!bwamem2_in && aligner == 'bwa-mem2') { + BWAMEM2_INDEX(fasta) + index_alignment = BWAMEM2_INDEX.out.index.collect() + versions = versions.mix(BWAMEM2_INDEX.out.versions) + } + else if (aligner == 'bwa-mem2') { + index_alignment = Channel.fromPath(bwamem2_in).map { index -> [[id: 'bwamem2'], index] }.collect() + } + else if (!dragmap_in && aligner == 'dragmap') { + DRAGMAP_HASHTABLE(fasta) + index_alignment = DRAGMAP_HASHTABLE.out.hashmap.collect() + versions = versions.mix(DRAGMAP_HASHTABLE.out.versions) + } + else if (aligner == 'dragmap') { + index_alignment = Channel.fromPath(dragmap_in).map { index -> [[id: 'dragmap'], index] }.collect() + } + } + else { + index_alignment = Channel.empty() + } + + if (!dict_in && step != "annotate") { + GATK4_CREATESEQUENCEDICTIONARY(fasta) + dict = GATK4_CREATESEQUENCEDICTIONARY.out.dict.collect() + versions = versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) + } + else if (dict_in) { + dict = Channel.fromPath(dict_in).map { it -> [[id: 'dict'], it] }.collect() + } + else { + dict = Channel.empty() + } + + if (!fasta_fai_in && step != "annotate") { + SAMTOOLS_FAIDX(fasta, [[id: 'no_fai'], []], false) + fasta_fai = SAMTOOLS_FAIDX.out.fai.collect() + versions = versions.mix(SAMTOOLS_FAIDX.out.versions) + } + else if (fasta_fai_in) { + fasta_fai = Channel.fromPath(fasta_fai_in).map { it -> [[id: 'fai'], it] }.collect() + } + else { + fasta_fai = Channel.empty() + } + + // Prepare genome for BBSplit contamination filtering + bbsplit_index = Channel.empty() + if (tools && tools.split(',').contains('bbsplit')) { + if (bbsplit_index_in) { + // Use user-provided bbsplit index + if (bbsplit_index_in.endsWith('.tar.gz')) { + bbsplit_index = UNTAR_BBSPLIT_INDEX([[id: 'bbsplit_index'], file(bbsplit_index_in, checkIfExists: true)]).untar.map { _meta, index -> index } + versions = versions.mix(UNTAR_BBSPLIT_INDEX.out.versions) + } + else { + bbsplit_index = Channel.value(file(bbsplit_index_in, checkIfExists: true)) + } + } + else if (bbsplit_fasta_list_in) { + // Build it from scratch if we have FASTA + Channel.from(file(bbsplit_fasta_list_in, checkIfExists: true)) + .splitCsv(header: false, sep: ',') + .flatMap { id, fafile -> [['id', id], ['fasta', file(fafile, checkIfExists: true)]] } + .groupTuple() + .map { it -> it[1] } + .collect { [it] } + .set { ch_bbsplit_fasta_list } + + bbsplit_index = BBMAP_INDEX( + [[id: "build_index"], []], + [], + fasta.map { _meta, fasta_ -> fasta_ }, + ch_bbsplit_fasta_list, + true, + ).index + } + } + + bcftools_annotations = bcftools_annotations_in ? Channel.fromPath(bcftools_annotations_in).collect() : Channel.value([]) + bcftools_annotations_tbi = bcftools_annotations_tbi_in ? Channel.fromPath(bcftools_annotations_tbi_in).collect() : Channel.value([]) + + if (!bcftools_annotations_tbi_in && bcftools_annotations_in) { + TABIX_BCFTOOLS_ANNOTATIONS(bcftools_annotations.flatten().map { vcf -> [[id: vcf.baseName], vcf] }) + bcftools_annotations_tbi = TABIX_BCFTOOLS_ANNOTATIONS.out.tbi.map { _meta, tbi -> [tbi] }.collect() + versions = versions.mix(TABIX_BCFTOOLS_ANNOTATIONS.out.versions) + } + + dbsnp = dbsnp_in ? Channel.fromPath(dbsnp_in).collect() : Channel.value([]) + dbsnp_tbi = dbsnp_tbi_in ? Channel.fromPath(dbsnp_tbi_in).collect() : Channel.value([]) + + if (!dbsnp_tbi_in && dbsnp_in && ((step == "mapping" || step == "markduplicates" || step == "prepare_recalibration") || (tools.split(',').contains('controlfreec') || tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope') || tools.split(',').contains('muse') || tools.split(',').contains('mutect2')))) { + TABIX_DBSNP(dbsnp.flatten().map { vcf -> [[id: vcf.baseName], vcf] }) + dbsnp_tbi = TABIX_DBSNP.out.tbi.map { _meta, tbi -> [tbi] }.collect() + versions = versions.mix(TABIX_DBSNP.out.versions) + } + + germline_resource = germline_resource_in ? Channel.fromPath(germline_resource_in).collect() : Channel.value([]) + germline_resource_tbi = germline_resource_tbi_in ? Channel.fromPath(germline_resource_tbi_in).collect() : Channel.value([]) + + if (!germline_resource_tbi_in && germline_resource_in && (tools.split(',').contains('mutect2') || tools.split(',').contains('sentieon_tnscope'))) { + TABIX_GERMLINE_RESOURCE(germline_resource.flatten().map { vcf -> [[id: vcf.baseName], vcf] }) + germline_resource_tbi = TABIX_GERMLINE_RESOURCE.out.tbi.map { _meta, tbi -> [tbi] }.collect() + versions = versions.mix(TABIX_GERMLINE_RESOURCE.out.versions) + } + + known_indels = known_indels_in ? Channel.fromPath(known_indels_in).collect() : Channel.value([]) + known_indels_tbi = known_indels_tbi_in ? Channel.fromPath(known_indels_tbi_in).collect() : Channel.value([]) + + if (!known_indels_tbi_in && known_indels_in && (step == 'mapping' || step == "markduplicates" || step == 'prepare_recalibration' || (tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope')))) { + TABIX_KNOWN_INDELS(known_indels.flatten().map { vcf -> [[id: vcf.baseName], vcf] }) + known_indels_tbi = TABIX_KNOWN_INDELS.out.tbi.map { _meta, tbi -> [tbi] }.collect() + versions = versions.mix(TABIX_KNOWN_INDELS.out.versions) + } + + known_snps = known_snps_in ? Channel.fromPath(known_snps_in).collect() : Channel.value([]) + known_snps_tbi = known_snps_tbi_in ? Channel.fromPath(known_snps_tbi_in).collect() : Channel.value([]) + + if (!known_snps_tbi_in && known_snps_in && (step == 'mapping' || step == "markduplicates" || step == 'prepare_recalibration' || (tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper')))) { + TABIX_KNOWN_SNPS(known_snps.flatten().map { vcf -> [[id: vcf.baseName], vcf] }) + known_snps_tbi = TABIX_KNOWN_SNPS.out.tbi.map { _meta, tbi -> [tbi] }.collect() + versions = versions.mix(TABIX_KNOWN_SNPS.out.versions) + } + + pon = pon_in ? Channel.fromPath(pon_in).collect() : Channel.value([]) + pon_tbi = pon_tbi_in ? Channel.fromPath(pon_tbi_in).collect() : Channel.value([]) + + if (!pon_tbi_in && pon_in && tools.split(',').contains('mutect2')) { + TABIX_PON(pon.flatten().map { vcf -> [[id: vcf.baseName], vcf] }) + pon_tbi = TABIX_PON.out.tbi.map { _meta, tbi -> [tbi] }.collect() + versions = versions.mix(TABIX_PON.out.versions) + } + + // known_sites is made by grouping both the dbsnp and the known snps/indels resources + // Which can either or both be optional + known_sites_indels = dbsnp.concat(known_indels).collect() + known_sites_indels_tbi = dbsnp_tbi.concat(known_indels_tbi).collect() + known_sites_snps = dbsnp.concat(known_snps).collect() + known_sites_snps_tbi = dbsnp_tbi.concat(known_snps_tbi).collect() + + // MSI + if (msisensor2_models_in && msisensor2_models_in.endsWith(".tar.gz") && tools.split(',').contains('msisensor2')) { + UNTAR_MSISENSOR2_MODELS(Channel.fromPath(file(msisensor2_models_in)).map { archive -> [[id: archive.baseName], archive] }) + msisensor2_models = UNTAR_MSISENSOR2_MODELS.out.untar.collect() + versions = versions.mix(UNTAR_MSISENSOR2_MODELS.out.versions) + } + else if (msisensor2_models_in && tools.split(',').contains('msisensor2')) { + msisensor2_models = Channel.fromPath(msisensor2_models_in).map { model -> [[id:model.baseName], model] }.collect() + } + else { + msisensor2_models = Channel.value([]) + } + + if (msisensorpro_scan_in) { + msisensorpro_scan = Channel.fromPath(msisensorpro_scan_in) + } + else if (tools.split(',').contains('msisensorpro')) { + MSISENSORPRO_SCAN(fasta) + msisensorpro_scan = MSISENSORPRO_SCAN.out.list.map { _meta, list -> [list] }.collect() + versions = versions.mix(MSISENSORPRO_SCAN.out.versions) + } + else { + msisensorpro_scan = Channel.value([]) + } + + // prepare ascat and controlfreec reference files + if (!ascat_alleles_in) { + ascat_alleles = Channel.empty() + } + else if (ascat_alleles_in.endsWith(".zip") && tools.split(',').contains('ascat')) { + UNZIP_ALLELES(Channel.fromPath(file(ascat_alleles_in)).map { archive -> [[id: archive.baseName], archive] }) + ascat_alleles = UNZIP_ALLELES.out.unzipped_archive.map { _meta, extracted_archive -> extracted_archive }.collect() + versions = versions.mix(UNZIP_ALLELES.out.versions) + } + else { + ascat_alleles = Channel.fromPath(ascat_alleles_in).collect() + } + + if (!ascat_loci_in) { + ascat_loci = Channel.empty() + } + else if (ascat_loci_in.endsWith(".zip") && tools.split(',').contains('ascat')) { + UNZIP_LOCI(Channel.fromPath(file(ascat_loci_in)).map { archive -> [[id: archive.baseName], archive] }) + ascat_loci = UNZIP_LOCI.out.unzipped_archive.map { _meta, extracted_archive -> extracted_archive }.collect() + versions = versions.mix(UNZIP_LOCI.out.versions) + } + else { + ascat_loci = Channel.fromPath(ascat_loci_in).collect() + } + + if (!ascat_loci_gc_in) { + ascat_loci_gc = Channel.value([]) + } + else if (ascat_loci_gc_in.endsWith(".zip") && tools.split(',').contains('ascat')) { + UNZIP_GC(Channel.fromPath(file(ascat_loci_gc_in)).map { archive -> [[id: archive.baseName], archive] }) + ascat_loci_gc = UNZIP_GC.out.unzipped_archive.map { _meta, extracted_archive -> extracted_archive }.collect() + versions = versions.mix(UNZIP_GC.out.versions) + } + else { + ascat_loci_gc = Channel.fromPath(ascat_loci_gc_in).collect() + } + + if (!ascat_loci_rt_in) { + ascat_loci_rt = Channel.value([]) + } + else if (ascat_loci_rt_in.endsWith(".zip") && tools.split(',').contains('ascat')) { + UNZIP_RT(Channel.fromPath(file(ascat_loci_rt_in)).map { archive -> [[id: archive.baseName], archive] }) + ascat_loci_rt = UNZIP_RT.out.unzipped_archive.map { _meta, extracted_archive -> extracted_archive }.collect() + versions = versions.mix(UNZIP_RT.out.versions) + } + else { + ascat_loci_rt = Channel.fromPath(ascat_loci_rt_in).collect() + } + + if (!chr_dir_in) { + chr_dir = Channel.value([]) + } + else if (chr_dir_in.endsWith(".tar.gz") && tools.split(',').contains('controlfreec')) { + UNTAR_CHR_DIR(Channel.fromPath(file(chr_dir_in)).map { archive -> [[id: archive.baseName], archive] }) + chr_dir = UNTAR_CHR_DIR.out.untar.map { _meta, extracted_archive -> extracted_archive }.collect() + versions = versions.mix(UNTAR_CHR_DIR.out.versions) + } + else { + chr_dir = Channel.fromPath(chr_dir_in).collect() + } + + emit: + ascat_alleles // Channel: [ascat_alleles] + ascat_loci // Channel: [ascat_loci] + ascat_loci_gc // Channel: [ascat_loci_gc] + ascat_loci_rt // Channel: [ascat_loci_rt] + bbsplit_index // Channel: [bbsplit/index/] + bcftools_annotations // Channel: [bcftools_annotations] + bcftools_annotations_tbi // Channel: [bcftools_annotations_tbi] + chr_dir // Channel: [chr_dir/] + dbsnp // Channel: [dbsnp] + dbsnp_tbi // Channel: [dbsnp_tbi] + dict // Channel: [meta, dict] + fasta // Channel: [meta, fasta] + fasta_fai // Channel: [meta, fasta_fai] + germline_resource // Channel: [germline_resource] + germline_resource_tbi // Channel: [germline_resource_tbi] + index_alignment // Channel: [meta, index_alignment/] either bwa/, bwamem2/ or dragmap/ + known_indels // Channel: [known_indels] + known_indels_tbi // Channel: [known_indels_tbi] + known_sites_indels // Channel: [known_sites_indels] + known_sites_indels_tbi // Channel: [known_sites_indels_tbi] + known_sites_snps // Channel: [known_sites_snps] + known_sites_snps_tbi // Channel: [known_sites_snps_tbi] + known_snps // Channel: [known_snps] + known_snps_tbi // Channel: [known_snps_tbi] + msisensor2_models // Channel: [models/] + msisensorpro_scan // Channel: [genome_msi.list] + pon // Channel: [pon] + pon_tbi // Channel: [pon_tbi] + vep_fasta // Channel: [meta, vep_fasta] + versions // Channel: [versions.yml] +} diff --git a/subworkflows/local/prepare_genome/tests/bbsplit.nf.test b/subworkflows/local/prepare_genome/tests/bbsplit.nf.test new file mode 100644 index 0000000000..ca96469853 --- /dev/null +++ b/subworkflows/local/prepare_genome/tests/bbsplit.nf.test @@ -0,0 +1,301 @@ +nextflow_workflow { + + name "Test Subworkflow PREPARE_GENOME - BBSplit functionality" + script "../main.nf" + workflow "PREPARE_GENOME" + config "./nextflow.config" + + tag "subworkflows" + tag "subworkflows_local" + tag "prepare_genome" + tag "bbsplit" + + test("BBSplit - build index from fasta list") { + + when { + params { + dict = params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict' + fasta_fai = params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai' + } + workflow { + """ + input[0] = null // ascat_alleles + input[1] = null // ascat_loci + input[2] = null // ascat_loci_gc + input[3] = null // ascat_loci_rt + input[4] = "${projectDir}/tests/csv/bbsplit_fasta_list.csv" // bbsplit_fasta_list + input[5] = null // bbsplit_index + input[6] = Channel.empty() // bcftools_annotations + input[7] = Channel.empty() // bcftools_annotations_tbi + input[8] = null // bwa + input[9] = null // bwamem2 + input[10] = null // chr_dir + input[11] = Channel.empty() // dbsnp + input[12] = Channel.empty() // dbsnp_tbi + input[13] = null // dict + input[14] = null // dragmap + input[15] = Channel.of([[id: "fasta"], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) // fasta + input[16] = null // fasta_fai + input[17] = Channel.empty() // germline_resource + input[18] = Channel.empty() // germline_resource_tbi + input[19] = Channel.empty() // known_indels + input[20] = Channel.empty() // known_indels_tbi + input[21] = Channel.empty() // known_snps + input[22] = Channel.empty() // known_snps_tbi + input[23] = null // msisensor2_models + input[24] = null // msisensorpro_scan + input[25] = Channel.empty() // pon + input[26] = Channel.empty() // pon_tbi + input[27] = 'bwa-mem' // aligner + input[28] = 'mapping' // step + input[29] = "bbsplit" // tools - enable bbsplit + input[30] = false // vep_include_fasta + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bbsplit_index, + workflow.out.versions + ).match() } + ) + } + } + + test("Don't run BBSplit - tools without bbsplit") { + + when { + params { + dict = params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict' + fasta_fai = params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai' + } + + workflow { + """ + input[0] = null // ascat_alleles + input[1] = null // ascat_loci + input[2] = null // ascat_loci_gc + input[3] = null // ascat_loci_rt + input[4] = null // bbsplit_fasta_list + input[5] = null // bbsplit_index + input[6] = Channel.empty() // bcftools_annotations + input[7] = Channel.empty() // bcftools_annotations_tbi + input[8] = null // bwa + input[9] = null // bwamem2 + input[10] = null // chr_dir + input[11] = Channel.empty() // dbsnp + input[12] = Channel.empty() // dbsnp_tbi + input[13] = null // dict + input[14] = null // dragmap + input[15] = Channel.of([[id: "fasta"], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) // fasta + input[16] = null // fasta_fai + input[17] = Channel.empty() // germline_resource + input[18] = Channel.empty() // germline_resource_tbi + input[19] = Channel.empty() // known_indels + input[20] = Channel.empty() // known_indels_tbi + input[21] = Channel.empty() // known_snps + input[22] = Channel.empty() // known_snps_tbi + input[23] = null // msisensor2_models + input[24] = null // msisensorpro_scan + input[25] = Channel.empty() // pon + input[26] = Channel.empty() // pon_tbi + input[27] = 'bwa-mem' // aligner + input[28] = 'mapping' // step + input[29] = "no_tools" // tools - bbsplit not enabled + input[30] = false // vep_include_fasta + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.bbsplit_index.size() == 0 } // Should be empty when skipped + ) + } + } + + test("BBSplit - use provided directory index") { + + when { + params { + dict = params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict' + fasta_fai = params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai' + } + + workflow { + """ + // Create a mock directory for testing + def mock_index_dir = file("${workDir}/mock_bbsplit_index") + mock_index_dir.mkdirs() + + input[0] = null // ascat_alleles + input[1] = null // ascat_loci + input[2] = null // ascat_loci_gc + input[3] = null // ascat_loci_rt + input[4] = null // bbsplit_fasta_list + input[5] = mock_index_dir.toString() // bbsplit_index (directory) + input[6] = Channel.empty() // bcftools_annotations + input[7] = Channel.empty() // bcftools_annotations_tbi + input[8] = null // bwa + input[9] = null // bwamem2 + input[10] = null // chr_dir + input[11] = Channel.empty() // dbsnp + input[12] = Channel.empty() // dbsnp_tbi + input[13] = null // dict + input[14] = null // dragmap + input[15] = Channel.of([[id: "fasta"], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) // fasta + input[16] = null // fasta_fai + input[17] = Channel.empty() // germline_resource + input[18] = Channel.empty() // germline_resource_tbi + input[19] = Channel.empty() // known_indels + input[20] = Channel.empty() // known_indels_tbi + input[21] = Channel.empty() // known_snps + input[22] = Channel.empty() // known_snps_tbi + input[23] = null // msisensor2_models + input[24] = null // msisensorpro_scan + input[25] = Channel.empty() // pon + input[26] = Channel.empty() // pon_tbi + input[27] = 'bwa-mem' // aligner + input[28] = 'mapping' // step + input[29] = "bbsplit" // tools - enable bbsplit + input[30] = false // vep_include_fasta + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.bbsplit_index, + workflow.out.versions + ).match() } + ) + } + } + + + test("BBSplit - build index - stub") { + + options "-stub" + + when { + params { + dict = params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict' + fasta_fai = params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai' + } + + workflow { + """ + input[0] = null // ascat_alleles + input[1] = null // ascat_loci + input[2] = null // ascat_loci_gc + input[3] = null // ascat_loci_rt + input[4] = "${projectDir}/tests/csv/bbsplit_fasta_list.csv" // bbsplit_fasta_list + input[5] = null // bbsplit_index + input[6] = Channel.empty() // bcftools_annotations + input[7] = Channel.empty() // bcftools_annotations_tbi + input[8] = null // bwa + input[9] = null // bwamem2 + input[10] = null // chr_dir + input[11] = Channel.empty() // dbsnp + input[12] = Channel.empty() // dbsnp_tbi + input[13] = null // dict + input[14] = null // dragmap + input[15] = Channel.of([[id: "fasta"], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) // fasta + input[16] = null // fasta_fai + input[17] = Channel.empty() // germline_resource + input[18] = Channel.empty() // germline_resource_tbi + input[19] = Channel.empty() // known_indels + input[20] = Channel.empty() // known_indels_tbi + input[21] = Channel.empty() // known_snps + input[22] = Channel.empty() // known_snps_tbi + input[23] = null // msisensor2_models + input[24] = null // msisensorpro_scan + input[25] = Channel.empty() // pon + input[26] = Channel.empty() // pon_tbi + input[27] = 'bwa-mem' // aligner + input[28] = 'mapping' // step + input[29] = "bbsplit" // tools - enable bbsplit + input[30] = false // vep_include_fasta + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.bbsplit_index, + workflow.out.versions + ).match() } + ) + } + } + + test("BBSplit - use provided tar.gz index - stub") { + // Using stub tests because we don't have a good tar archive right now + options "-stub" + + when { + params { + dict = params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict' + fasta_fai = params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai' + } + + workflow { + """ + // Create a dummy tar.gz file just for checkIfExists validation in stub mode + def dummy_tar = file("${workDir}/bbsplit_index.tar.gz") + dummy_tar.text = "dummy file for stub test" + + input[0] = null // ascat_alleles + input[1] = null // ascat_loci + input[2] = null // ascat_loci_gc + input[3] = null // ascat_loci_rt + input[4] = null // bbsplit_fasta_list + input[5] = dummy_tar.toString() // bbsplit_index (tar.gz file) + input[6] = Channel.empty() // bcftools_annotations + input[7] = Channel.empty() // bcftools_annotations_tbi + input[8] = null // bwa + input[9] = null // bwamem2 + input[10] = null // chr_dir + input[11] = Channel.empty() // dbsnp + input[12] = Channel.empty() // dbsnp_tbi + input[13] = null // dict + input[14] = null // dragmap + input[15] = Channel.of([[id: "fasta"], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) // fasta + input[16] = null // fasta_fai + input[17] = Channel.empty() // germline_resource + input[18] = Channel.empty() // germline_resource_tbi + input[19] = Channel.empty() // known_indels + input[20] = Channel.empty() // known_indels_tbi + input[21] = Channel.empty() // known_snps + input[22] = Channel.empty() // known_snps_tbi + input[23] = null // msisensor2_models + input[24] = null // msisensorpro_scan + input[25] = Channel.empty() // pon + input[26] = Channel.empty() // pon_tbi + input[27] = 'bwa-mem' // aligner + input[28] = 'mapping' // step + input[29] = "bbsplit" // tools - enable bbsplit + input[30] = false // vep_include_fasta + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.bbsplit_index, + workflow.out.versions + ).match() } + ) + } + } +} diff --git a/subworkflows/local/prepare_genome/tests/bbsplit.nf.test.snap b/subworkflows/local/prepare_genome/tests/bbsplit.nf.test.snap new file mode 100644 index 0000000000..79266ce59c --- /dev/null +++ b/subworkflows/local/prepare_genome/tests/bbsplit.nf.test.snap @@ -0,0 +1,91 @@ +{ + "BBSplit - build index from fasta list": { + "content": [ + [ + [ + [ + [ + [ + "chr1.chrom.gz:md5,8fec4c63ec642613ad10adf4cc2e6ade", + "info.txt:md5,272a899ffbd2d46d61956693835e1d59", + "merged_ref_9222711925177839698.fa.gz:md5,a87e375668975e41efe20d96f2d9df47", + "namelist.txt:md5,c26fae3e77a7b86aded8dbb20e387f38", + "reflist.txt:md5,5b71f0412da1e2528686945503ad6903", + "scaffolds.txt.gz:md5,fd2b7c233f3b02b9332f1e0dab4afb99", + "summary.txt:md5,6b3846933048c29dc961673ac9cf1a30" + ] + ], + [ + [ + "chr1_index_k13_c13_b1.block:md5,385913c1e84b77dc7bf36288ee1c8706", + "chr1_index_k13_c13_b1.block2.gz:md5,2556b45206835a0ff7078d683b5fd6e2" + ] + ] + ] + ] + ], + [ + "versions.yml:md5,cc5444e21efd35d6322702dbef835fb5", + "versions.yml:md5,de3dcee7a7d0e6a6913520a1c6e37372" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-29T11:10:54.581831331" + }, + "BBSplit - use provided directory index": { + "content": [ + [ + [ + + ] + ], + [ + "versions.yml:md5,de3dcee7a7d0e6a6913520a1c6e37372" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-17T16:09:18.854073" + }, + "BBSplit - use provided tar.gz index - stub": { + "content": [ + [ + [ + + ] + ], + [ + "versions.yml:md5,1f674a661ce65c9163f467cc49a064c1", + "versions.yml:md5,de3dcee7a7d0e6a6913520a1c6e37372" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-17T16:12:52.6461" + }, + "BBSplit - build index - stub": { + "content": [ + [ + [ + + ] + ], + [ + "versions.yml:md5,cc5444e21efd35d6322702dbef835fb5", + "versions.yml:md5,de3dcee7a7d0e6a6913520a1c6e37372" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-17T16:10:43.234163" + } +} \ No newline at end of file diff --git a/subworkflows/local/prepare_genome/tests/nextflow.config b/subworkflows/local/prepare_genome/tests/nextflow.config new file mode 100644 index 0000000000..16d29f4fde --- /dev/null +++ b/subworkflows/local/prepare_genome/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'PREPARE_GENOME:BBMAP_BBSPLIT' { + memory = '1.GB' + } +} diff --git a/subworkflows/local/prepare_intervals/main.nf b/subworkflows/local/prepare_intervals/main.nf new file mode 100644 index 0000000000..8b7d2eff19 --- /dev/null +++ b/subworkflows/local/prepare_intervals/main.nf @@ -0,0 +1,115 @@ +// +// PREPARE INTERVALS +// + +// Initialize channels based on params or indices that were just built +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { CREATE_INTERVALS_BED } from '../../../modules/local/create_intervals_bed' +include { GATK4_INTERVALLISTTOBED } from '../../../modules/nf-core/gatk4/intervallisttobed' +include { GAWK as BUILD_INTERVALS } from '../../../modules/nf-core/gawk' +include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_SPLIT } from '../../../modules/nf-core/tabix/bgziptabix' +include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_COMBINED } from '../../../modules/nf-core/tabix/bgziptabix' + +workflow PREPARE_INTERVALS { + take: + fasta_fai // mandatory [ fasta_fai ] + intervals // [ params.intervals ] + no_intervals // [ params.no_intervals ] + nucleotides_per_second + outdir + step + + main: + versions = Channel.empty() + + intervals_bed = Channel.empty() // List of [ bed, num_intervals ], one for each region + intervals_bed_gz_tbi = Channel.empty() // List of [ bed.gz, bed,gz.tbi, num_intervals ], one for each region + intervals_combined = Channel.empty() // Single bed file containing all intervals + + if (no_intervals) { + file("${outdir}/no_intervals.bed").text = "no_intervals\n" + file("${outdir}/no_intervals.bed.gz").text = "no_intervals\n" + file("${outdir}/no_intervals.bed.gz.tbi").text = "no_intervals\n" + + intervals_bed = Channel.fromPath(file("${outdir}/no_intervals.bed")).map{ it -> [ it, 0 ] } + intervals_bed_gz_tbi = Channel.fromPath(file("${outdir}/no_intervals.bed.{gz,gz.tbi}")).collect().map{ it -> [ it, 0 ] } + intervals_combined = Channel.fromPath(file("${outdir}/no_intervals.bed")).map{ it -> [ [ id:it.simpleName ], it ] } + } else if (step != 'annotate' && step != 'controlfreec') { + // If no interval/target file is provided, then generated intervals from FASTA file + if (!intervals) { + BUILD_INTERVALS(fasta_fai, [], []) + + intervals_combined = BUILD_INTERVALS.out.output + + CREATE_INTERVALS_BED(intervals_combined.map{ meta, path -> path }, nucleotides_per_second) + + intervals_bed = CREATE_INTERVALS_BED.out.bed + + versions = versions.mix(BUILD_INTERVALS.out.versions) + versions = versions.mix(CREATE_INTERVALS_BED.out.versions) + } else { + intervals_combined = Channel.fromPath(file(intervals)).map{it -> [ [ id:it.baseName ], it ] } + CREATE_INTERVALS_BED(file(intervals), nucleotides_per_second) + + intervals_bed = CREATE_INTERVALS_BED.out.bed + + versions = versions.mix(CREATE_INTERVALS_BED.out.versions) + + // If interval file is not provided as .bed, but e.g. as .interval_list then convert to BED format + if (intervals.endsWith(".interval_list")) { + GATK4_INTERVALLISTTOBED(intervals_combined) + intervals_combined = GATK4_INTERVALLISTTOBED.out.bed + versions = versions.mix(GATK4_INTERVALLISTTOBED.out.versions) + } + } + + // Now for the intervals.bed the following operations are done: + // 1. Intervals file is split up into multiple bed files for scatter/gather + // 2. Each bed file is indexed + + // 1. Intervals file is split up into multiple bed files for scatter/gather & grouping together small intervals + intervals_bed = intervals_bed.flatten() + .map{ intervalFile -> + def duration = 0.0 + intervalFile.eachLine { line -> + def fields = line.split('\t') + if (fields.size() >= 5) duration += fields[4].toFloat() + else { + def start = fields[1].toInteger() + def end = fields[2].toInteger() + duration += (end - start) / nucleotides_per_second + } + } + [ duration, intervalFile ] + }.toSortedList({ a, b -> b[0] <=> a[0] }) + .flatten().collate(2).map{ duration, intervalFile -> intervalFile }.collect() + // Adding number of intervals as elements + .map{ it -> [ it, it.size() ] } + .transpose() + + // 2. Create bed.gz and bed.gz.tbi for each interval file. They are split by region (see above) + TABIX_BGZIPTABIX_INTERVAL_SPLIT(intervals_bed.map{ file, num_intervals -> [ [ id:file.baseName], file ] }) + + intervals_bed_gz_tbi = TABIX_BGZIPTABIX_INTERVAL_SPLIT.out.gz_index.map{ meta, bed, tbi -> [ bed, tbi ] }.toList() + // Adding number of intervals as elements + .map{ it -> [ it, it.size() ] } + .transpose() + } + + TABIX_BGZIPTABIX_INTERVAL_COMBINED(intervals_combined) + + intervals_bed_combined = intervals_combined.map{meta, bed -> bed }.collect() + intervals_bed_gz_tbi_combined = TABIX_BGZIPTABIX_INTERVAL_COMBINED.out.gz_index.map{meta, gz, tbi -> [gz, tbi] }.collect() + + emit: + // Intervals split for parallel execution + intervals_bed // [ intervals.bed, num_intervals ] + intervals_bed_gz_tbi // [ intervals.bed.gz, intervals.bed.gz.tbi, num_intervals ] + // All intervals in one file + intervals_bed_combined // [ intervals.bed ] + intervals_bed_gz_tbi_combined // [ intervals.bed.gz, intervals.bed.gz.tbi] + + versions // [ versions.yml ] +} diff --git a/subworkflows/local/prepare_recalibration_csv.nf b/subworkflows/local/prepare_recalibration_csv.nf deleted file mode 100644 index 7e79b658bf..0000000000 --- a/subworkflows/local/prepare_recalibration_csv.nf +++ /dev/null @@ -1,20 +0,0 @@ -// -// PREPARE_RECALIBRATION_CSV -// - -workflow PREPARE_RECALIBRATION_CSV { - take: - table_bqsr // channel: [mandatory] meta, table - - main: - // Creating csv files to restart from this step - table_bqsr.collectFile(storeDir: "${params.outdir}/preprocessing/csv") { meta, table -> - patient = meta.patient - sample = meta.sample - gender = meta.gender - status = meta.status - cram = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.cram" - crai = "${params.outdir}/preprocessing/${sample}/markduplicates/${sample}.md.cram.crai" - ["markduplicates_no_table_${sample}.csv", "patient,gender,status,sample,cram,crai\n${patient},${gender},${status},${sample},${cram},${crai}\n"] - }.collectFile(name: 'markduplicates_no_table.csv', keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/preprocessing/csv") -} diff --git a/subworkflows/local/prepare_reference_cnvkit/main.nf b/subworkflows/local/prepare_reference_cnvkit/main.nf new file mode 100644 index 0000000000..13bbef5f6b --- /dev/null +++ b/subworkflows/local/prepare_reference_cnvkit/main.nf @@ -0,0 +1,22 @@ +include { CNVKIT_ANTITARGET } from '../../../modules/nf-core/cnvkit/antitarget' +include { CNVKIT_REFERENCE } from '../../../modules/nf-core/cnvkit/reference' + +workflow PREPARE_REFERENCE_CNVKIT { + take: + fasta // channel: [mandatory] fasta + intervals_bed_combined // channel: [] + + main: + versions = Channel.empty() + + // prepare a antitarget reference files for tumor_only mode of cnvkit + CNVKIT_ANTITARGET(intervals_bed_combined.flatten().map { it -> [[id: 'intervals'], it] }) + CNVKIT_REFERENCE(fasta.map { _meta, fasta_ -> [fasta_] }, intervals_bed_combined, CNVKIT_ANTITARGET.out.bed.map { _meta, bed -> [bed] }) + + versions = versions.mix(CNVKIT_ANTITARGET.out.versions) + versions = versions.mix(CNVKIT_REFERENCE.out.versions) + + emit: + cnvkit_reference = CNVKIT_REFERENCE.out.cnn.collect() + versions +} diff --git a/subworkflows/local/prepare_snpsift_databases/main.nf b/subworkflows/local/prepare_snpsift_databases/main.nf new file mode 100644 index 0000000000..581a62d020 --- /dev/null +++ b/subworkflows/local/prepare_snpsift_databases/main.nf @@ -0,0 +1,52 @@ +// +// Prepare SnpSift annotation databases +// + +include { SNPSIFT_ANNMEMCREATE } from '../../../modules/nf-core/snpsift/annmemcreate' + +workflow PREPARE_SNPSIFT_DATABASES { + take: + val_db_configs // List of maps: [[vcf: file, tbi: file, fields: '', prefix: '', vardb: null], ...] + + main: + ch_configs = channel.fromList(val_db_configs) + + // Branch: create vardb if not provided + ch_configs.branch { + has_vardb: it.vardb != null + needs_vardb: true + }.set { ch_branched } + + // Create vardbs for databases that need them + // Convert semicolon-separated fields to comma-separated (SnpSift expects commas) + SNPSIFT_ANNMEMCREATE( + ch_branched.needs_vardb.map { [[id: it.vcf.baseName], it.vcf, it.tbi, it.fields ? it.fields.replace(';', ',') : ''] } + ) + + // Join created vardbs back with their configs + ch_created = SNPSIFT_ANNMEMCREATE.out.database + .map { meta, vardb -> [meta.id, vardb] } + .join(ch_branched.needs_vardb.map { [it.vcf.baseName, it] }) + .map { _id, vardb, config -> [config.vcf, config.tbi, vardb, config.fields ? config.fields.replace(';', ',') : '', config.prefix ?: ''] } + + // Configs with pre-built vardb + ch_prebuilt = ch_branched.has_vardb + .map { [it.vcf, it.tbi, it.vardb, it.fields ? it.fields.replace(';', ',') : '', it.prefix ?: ''] } + + // Collect all into output tuple + ch_db_tuple = ch_prebuilt + .mix(ch_created) + .toList() + .map { list -> + [ + list.collect { it[0] }, // db_vcf + list.collect { it[1] }, // db_vcf_tbi + list.collect { it[2] }, // db_vardb + list.collect { it[3] }, // db_fields + list.collect { it[4] } // db_prefixes + ] + } + + emit: + db_tuple = ch_db_tuple +} diff --git a/subworkflows/local/prepare_snpsift_databases/meta.yml b/subworkflows/local/prepare_snpsift_databases/meta.yml new file mode 100644 index 0000000000..4c062d070d --- /dev/null +++ b/subworkflows/local/prepare_snpsift_databases/meta.yml @@ -0,0 +1,31 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/meta-schema.json +name: "prepare_snpsift_databases" +description: Prepare SnpSift annotation databases, creating vardb directories where needed +keywords: + - snpsift + - annotation + - database + - vcf +components: + - snpsift/annmem +input: + - val_db_configs: + type: list + description: | + List of maps containing database configuration. + Each map should contain: + - vcf: VCF database file + - tbi: Tabix index file + - fields: Comma-separated INFO fields to annotate (optional) + - prefix: Prefix for annotated field names (optional) + - vardb: Pre-built vardb directory (optional, will be created if null) +output: + - db_tuple: + type: tuple + description: | + Tuple containing lists for SNPSIFT_ANNMEM input: + [[databases], [tbis], [vardbs], [fields], [prefixes]] +authors: + - "@friederike-hanssen" +maintainers: + - "@friederike-hanssen" diff --git a/subworkflows/local/recalibrate_csv.nf b/subworkflows/local/recalibrate_csv.nf deleted file mode 100644 index 13b0aeca55..0000000000 --- a/subworkflows/local/recalibrate_csv.nf +++ /dev/null @@ -1,20 +0,0 @@ -// -// RECALIBRATE_CSV -// - -workflow RECALIBRATE_CSV { - take: - cram_recalibrated_index // channel: [mandatory] meta, cram, crai - - main: - // Creating csv files to restart from this step - cram_recalibrated_index.collectFile(storeDir: "${params.outdir}/preprocessing/csv") { meta, cram, crai -> - patient = meta.patient - sample = meta.sample - gender = meta.gender - status = meta.status - cram = "${params.outdir}/preprocessing/${sample}/recalibrated/${sample}.recal.cram" - crai = "${params.outdir}/preprocessing/${sample}/recalibrated/${sample}.recal.cram.crai" - ["recalibrated_${sample}.csv", "patient,gender,status,sample,cram,crai\n${patient},${gender},${status},${sample},${cram},${crai}\n"] - }.collectFile(name: 'recalibrated.csv', keepHeader: true, skip: 1, sort: true, storeDir: "${params.outdir}/preprocessing/csv") -} diff --git a/subworkflows/local/samplesheet_to_channel/main.nf b/subworkflows/local/samplesheet_to_channel/main.nf new file mode 100644 index 0000000000..6bbde0f514 --- /dev/null +++ b/subworkflows/local/samplesheet_to_channel/main.nf @@ -0,0 +1,464 @@ +include { readStructure } from 'plugin/nf-fgbio' + +workflow SAMPLESHEET_TO_CHANNEL { + take: + ch_from_samplesheet // samplesheet + aligner // String: aligner + ascat_alleles // Path: ascat alleles + ascat_loci // Path: ascat loci + ascat_loci_gc // Path: ascat loci gc + ascat_loci_rt // Path: ascat loci rt + bcftools_annotations // Path: bcftools annotations + bcftools_annotations_tbi // Path: bcftools annotations tbi + bcftools_columns // Path: bcftools columns + bcftools_header_lines // Path: bcftools header lines + build_only_index // Boolean: build only index + dbsnp // Path: dbsnp + fasta // Path: fasta + germline_resource // Path: germline resource + intervals // Path: intervals + joint_germline // Boolean: joint_germline + joint_mutect2 // Boolean: joint_mutect2 + known_indels // Path: known indels + known_snps // Path: known snps + no_intervals // Boolean: no intervals + pon // Path: pon + sentieon_dnascope_emit_mode // String: sentieon dnascope emit mode + sentieon_haplotyper_emit_mode // String: sentieon haplotyper emit mode + seq_center // String: seq center + seq_platform // String: seq platform + skip_tools // Array: skip tools + snpeff_cache // Path: snpeff cache + snpeff_db // String: snpeff db + step // String: step + tools // Array: tools + umi_length // Integer: umi length for fastp extraction + umi_location // String: umi location for fastp extraction + umi_in_read_header // Boolean: umi in read header + umi_read_structure // String: umi read structure for fgbio consensus + wes // wes + + main: + ch_from_samplesheet.dump(tag: "ch_from_samplesheet") + + ch_from_samplesheet + .map { meta, _fastq_1, _fastq_2, _spring_1, _spring_2, _table, _cram, _crai, _bam, _bai, _contamination, _vcf, _variantcaller -> + // Get only the patient, sample and status fields from the meta map + [meta.patient, meta.subMap('sample', 'status')] + } + .unique() + .groupTuple() + .map { patient, samples -> + // Count samples with status 0 and status 1 + def status0_count = samples.count { it.status == 0 } + def status1_count = samples.count { it.status == 1 } + + // Check the condition and exit with an error if met + if (status1_count == 1 && status0_count > 1) { + System.err.println("Patient [${patient}] has more than one sample [${status0_count}] with normal status [0] and one sample with tumor status [1].") + error("Execution halted due to sample status inconsistency.") + } + } + + ch_from_samplesheet + .map { meta, _fastq_1, _fastq_2, _spring_1, _spring_2, _table, _cram, _crai, _bam, _bai, _contamination, _vcf, _variantcaller -> + // Create a unique key for patient-sample-status-lane combination + def combination_key = "${meta.patient}-${meta.sample}-${meta.status}-${meta.lane}" + [combination_key, [meta.patient, meta.sample, meta.status, meta.lane]] + } + .groupTuple() + .map { combination_key, combination_list -> + if (combination_list.size() > 1) { + def patient = combination_list[0][0] + def sample = combination_list[0][1] + def status = combination_list[0][2] + def lane = combination_list[0][3] + System.err.println("Duplicate patient-sample-status-lane combination found: Patient '${patient}', Sample '${sample}', Status '${status}', Lane '${lane}' appears ${combination_list.size()} times. Please ensure each combination is unique.") + error("Execution halted due to duplicate patient-sample-status-lane combination.") + } + } + + ch_from_samplesheet + .map { meta, _fastq_1, _fastq_2, _spring_1, _spring_2, _table, _cram, _crai, _bam, _bai, _contamination, _vcf, _variantcaller -> + // Get only the patient, sample and status fields from the meta map + [meta.patient, meta.subMap('sample', 'status')] + } + .unique() + .groupTuple() + .map { patient, samples -> + // Return the patient and the list of sample ids + [patient, samples.collect { it.sample }] + } + // Flatten to [sample_id, patient] pairs + .flatMap { patient, sample_ids -> sample_ids.collect { sample_id -> [sample_id, patient] } } + // Group by sample_id to collect all patient ids per sample + .groupTuple() + .map { sample_id, patient_ids -> + def unique_patients = patient_ids.unique() + if (unique_patients.size() > 1) { + System.err.println("Sample ID '${sample_id}' is associated with multiple patient IDs: ${unique_patients.join(', ')}. Please ensure each sample ID is unique to a single patient.") + error("Execution halted due to sample status inconsistency.") + } + } + + // Process the input channel to group lanes by patient and sample + // Generate patient_sample key to group lanes together + // Save the channel ch_with_patient_sample for later use + // Group by patient_sample to get all lanes + // Count number of lanes per sample + // Combine with channel ch_with_patient_sample to add numLanes information + input_sample = ch_from_samplesheet + .map { meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, contamination, vcf, variantcaller -> + [meta.patient + meta.sample, [meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, contamination, vcf, variantcaller]] + } + .tap { ch_with_patient_sample } + .groupTuple() + .map { patient_sample, ch_items -> + [patient_sample, ch_items.size()] + } + .combine(ch_with_patient_sample, by: 0) + .map { _patient_sample, num_lanes, ch_items -> + def (meta, fastq_1, fastq_2, spring_1, spring_2, table, cram, crai, bam, bai, contamination, vcf, variantcaller) = ch_items + + if (contamination) { + meta = meta + [ contamination: contamination] + } + if ((meta.lane || meta.lane == 0) && fastq_2) { + // mapping from fastq files + meta = meta + [id: "${meta.sample}-${meta.lane}".toString(), data_type: "fastq_gz", num_lanes: num_lanes.toInteger(), size: 1] + + if (step == 'mapping') { + return [meta, [fastq_1, fastq_2]] + } + else { + error("Samplesheet contains fastq files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + } + else if ((meta.lane || meta.lane == 0) && spring_1 && spring_2) { + // mapping from TWO spring-files - one with R1 and one with R2 + meta = meta + [id: "${meta.sample}-${meta.lane}".toString(), data_type: "two_fastq_gz_spring", num_lanes: num_lanes.toInteger(), size: 1] + + if (step == 'mapping') { + return [meta, [spring_1, spring_2]] + } + else { + error("Samplesheet contains spring files (in columns `spring_1` and `spring_2`) but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + } + else if ((meta.lane || meta.lane == 0) && spring_1 && !spring_2) { + // mapping from ONE spring-file containing both R1 and R2 + meta = meta + [id: "${meta.sample}-${meta.lane}".toString(), data_type: "one_fastq_gz_spring", num_lanes: num_lanes.toInteger(), size: 1] + + if (step == 'mapping') { + return [meta, [spring_1]] + } + else { + error("Samplesheet contains a spring file (in columns `spring_1`) but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + } + else if ((meta.lane || meta.lane == 0) && bam) { + // Any step from BAM + if (step != 'mapping' && !bai) { + error("BAM index (bai) should be provided.") + } + // Only use lane-based ID for mapping step where lanes are processed separately; + // for all other steps (variant_calling, recalibrate, etc.) use sample-based ID + // to ensure consistent ID matching in downstream channel joins + meta = meta + [id: step == 'mapping' ? "${meta.sample}-${meta.lane}".toString() : meta.sample.toString()] + def CN = seq_center ? "CN:${seq_center}\\t" : '' + def read_group = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${fasta}\\tPL:${seq_platform}\"" + + meta = meta - meta.subMap('lane') + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1] + + if (step != 'annotate') { + return [meta - meta.subMap('lane'), bam, bai] + } + else { + error("Samplesheet contains bam files but step is `annotate`. The pipeline is expecting vcf files for the annotation. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + } + else if (table && cram) { + // recalibration from CRAM + meta = meta + [id: meta.sample, data_type: 'cram'] + + if (!(step == 'mapping' || step == 'annotate')) { + return [meta - meta.subMap('lane'), cram, crai, table] + } + else { + error("Samplesheet contains cram files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + } + else if (table && bam) { + // recalibration when skipping MarkDuplicates + meta = meta + [id: meta.sample, data_type: 'bam'] + + if (!(step == 'mapping' || step == 'annotate')) { + return [meta - meta.subMap('lane'), bam, bai, table] + } + else { + error("Samplesheet contains bam files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + } + else if (cram) { + // prepare_recalibration or variantcalling from CRAM + meta = meta + [id: meta.sample, data_type: 'cram'] + + if (!(step == 'mapping' || step == 'annotate')) { + return [meta - meta.subMap('lane'), cram, crai] + } + else { + error("Samplesheet contains cram files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + } + else if (bam) { + // prepare_recalibration when skipping MarkDuplicates or markduplicates + meta = meta + [id: meta.sample, data_type: 'bam'] + + if (!(step == 'mapping' || step == 'annotate')) { + return [meta - meta.subMap('lane'), bam, bai] + } + else { + error("Samplesheet contains bam files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + } + else if (vcf) { + // annotation + meta = meta + [id: meta.sample, data_type: 'vcf', variantcaller: variantcaller ?: ''] + + if (step == 'annotate') { + return [meta - meta.subMap('lane'), vcf] + } + else { + error("Samplesheet contains vcf files but step is `${step}`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + } + else { + error("Missing or unknown field in csv file header. Please check your samplesheet") + } + } + + if (step != 'annotate' && tools && !build_only_index) { + // Two checks for ensuring that the pipeline stops with a meaningful error message if + // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and + // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples. + input_sample + .filter { it[0].status == 1 } + .ifEmpty { + // In this case, the sample-sheet contains no tumor-samples + if (!build_only_index) { + def tools_tumor = ['ascat', 'controlfreec', 'mutect2', 'msisensorpro'] + def tools_tumor_asked = [] + tools_tumor.each { tool -> + if (tools && tools.split(',').contains(tool)) { + tools_tumor_asked.add(tool) + } + } + if (!tools_tumor_asked.isEmpty()) { + error('The sample-sheet only contains normal-samples, but the following tools, which were requested with "--tools", expect at least one tumor-sample : ' + tools_tumor_asked.join(", ")) + } + } + } + + input_sample + .filter { it[0].status == 0 } + .ifEmpty { + // In this case, the sample-sheet contains no normal/germline-samples + def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller', 'msisensorpro'] + def requested_tools_requiring_normal_samples = [] + tools_requiring_normal_samples.each { tool_requiring_normal_samples -> + if (tools && tools.split(',').contains(tool_requiring_normal_samples)) { + requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) + } + } + if (!requested_tools_requiring_normal_samples.isEmpty()) { + error('The sample-sheet only contains tumor-samples, but the following tools, which were requested by the option "tools", expect at least one normal-sample : ' + requested_tools_requiring_normal_samples.join(", ")) + } + } + } + + // Fails when wrong extension for intervals file + if (wes && !step == 'annotate') { + if (intervals && !intervals.endsWith("bed")) { + error("Target file specified with `--intervals` must be in BED format for targeted data") + } + else { + log.warn("Intervals file was provided without parameter `--wes`: Pipeline will assume this is Whole-Genome-Sequencing data.") + } + } + else if (intervals && !intervals.endsWith("bed") && !intervals.endsWith("list")) { + error("Intervals file must end with .bed, .list, or .interval_list") + } + + if (step == 'mapping' && aligner.contains("dragmap") && !(skip_tools && skip_tools.split(',').contains("baserecalibrator"))) { + log.warn("DragMap was specified as aligner. Base recalibration is not contained in --skip_tools. It is recommended to skip baserecalibration when using DragMap\nhttps://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode") + } + + if (step == 'mapping' && (aligner.contains("parabricks") || aligner.contains("sentieon-bwamem")) && umi_read_structure) { + error("${aligner} is currently not compatible with FGBio UMI handling. Please choose a different aligner.") + } + + if (step == 'mapping' && aligner.contains("parabricks" ) && umi_in_read_header) { + error("${aligner} is currently not compatible with extracting UMIs from read headers. Please choose a different aligner.") + } + + if (step == 'mapping' && aligner.contains("parabricks") && umi_location) { + error("${aligner} is currently not compatible with UMI extraction from reads through fastp. Please choose a different aligner.") + } + + if (step == 'mapping' && umi_read_structure && umi_location) { + error("UMI extraction from reads through fastp (umi_location) and fgbio consensus read generation (umi_read_structure) cannot be used together. Please choose one of the two options.") + } + + if (step == 'mapping' && umi_read_structure && umi_in_read_header) { + // If UMIs are in read header, then we cannot use umi_read_structure separately, and instead the UMIs will be taken directly from the header + // This requires us to set umi_read_structure to "+T +T" to indicate that UMIs are in the read header + if( umi_read_structure != "+T +T" ) { + error("UMI extraction from read headers (`umi_in_read_header`) will override `umi_read_structure` when using fgbio consensus generation. Please set `umi_read_structure` to '+T +T'.") + } + } + + if (step == 'mapping' && umi_in_read_header && umi_location) { + error("UMI extraction from read headers (umi_in_read_header) and UMI extraction (umi_location) from reads through fastp cannot be used together. Please choose one of the two options.") + } + + if (step == 'mapping' && umi_location && !umi_length) { + error("UMI extraction (umi_location) from reads through fastp requires a UMI length to be specified.") + } + + if (tools && tools.split(',').contains("sentieon_haplotyper") && joint_germline && (!sentieon_haplotyper_emit_mode || !sentieon_haplotyper_emit_mode.contains('gvcf'))) { + error("When setting the option `--joint_germline` and including `sentieon_haplotyper` among the requested tools, please set `--sentieon_haplotyper_emit_mode` to include `gvcf`.") + } + + // MarkDuplicatesSpark can't do UMI based deduplication + if (tools && tools.split(',').contains('markduplicates_spark') && (umi_in_read_header || umi_location )) { + error("UMI based deduplication is not supported by MarkDuplicatesSpark. Please choose a different tool for deduplication.") + } + + // Check the UMI read structure is correct using fgbio plugin + // Copied from fastquorum + if( umi_read_structure) { + umi_read_structure.tokenize(" ").each { rs-> + // If parsing the read structure fails, then a java.lang.reflect.InvocationTargetException will be thrown, with + // the cause containing the exception produced by fgbio. + try { + readStructure(rs) + } catch (java.lang.reflect.InvocationTargetException ex) { + def message = """ + |Please check the input UMI Read structure`${umi_read_structure}` invalid + | + | ${ex.getCause().getMessage()} + | + | For more information on read structures, visit: https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures + | + | Validate your read structures here: https://fulcrumgenomics.github.io/fgbio/validate-read-structure.html + |""".stripMargin() + error(message) + throw ex + } + } + } + + // Fails or warns when missing files or params for ascat + if (tools && tools.split(',').contains('ascat')) { + if (!ascat_alleles) { + error("No allele files were provided for running ASCAT. Please provide a zip folder with allele files.") + } + if (!ascat_loci) { + error("No loci files were provided for running ASCAT. Please provide a zip folder with loci files.") + } + if (!ascat_loci_gc && !ascat_loci_rt) { + log.warn("No LogRCorrection performed in ASCAT. For LogRCorrection to run, please provide either loci gc files or both loci gc files and loci rt files.") + } + if (wes) { + log.warn("Default reference files not suited for running ASCAT on WES data. It's recommended to use the reference files provided here: https://github.com/Wedge-lab/battenberg#required-reference-files") + } + } + + // Warns when missing files or params for mutect2 + if (tools && tools.split(',').contains('mutect2')) { + if (!pon) { + log.warn("No Panel-of-normal was specified for Mutect2.\nIt is highly recommended to use one: https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2\nFor more information on how to create one: https://gatk.broadinstitute.org/hc/en-us/articles/5358921041947-CreateSomaticPanelOfNormals-BETA-") + } + if (!germline_resource) { + log.warn("If Mutect2 is specified without a germline resource, no filtering will be done.\nIt is recommended to use one: https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2") + } + if (pon && pon.contains("/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz")) { + log.warn("The default Panel-of-Normals provided by GATK is used for Mutect2.\nIt is highly recommended to generate one from normal samples that are technical similar to the tumor ones.\nFor more information: https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-") + } + } + + // Fails when missing resources for baserecalibrator + // Warns when missing resources for haplotypecaller + if (!dbsnp && !known_indels) { + if (step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate'] && (!skip_tools || (skip_tools && !skip_tools.split(',').contains('baserecalibrator')))) { + error("Base quality score recalibration requires at least one resource file. Please provide at least one of `--dbsnp` or `--known_indels`\nYou can skip this step in the workflow by adding `--skip_tools baserecalibrator` to the command.") + } + if (tools && (tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope'))) { + log.warn("If GATK's Haplotypecaller, Sentieon's Dnascope or Sentieon's Haplotyper is specified, without `--dbsnp` or `--known_indels no filtering will be done. For filtering, please provide at least one of `--dbsnp` or `--known_indels`.\nFor more information see FilterVariantTranches (single-sample, default): https://gatk.broadinstitute.org/hc/en-us/articles/5358928898971-FilterVariantTranches\nFor more information see VariantRecalibration (--joint_germline): https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator\nFor more information on GATK Best practice germline variant calling: https://gatk.broadinstitute.org/hc/en-us/articles/360035535932-Germline-short-variant-discovery-SNPs-Indels-") + } + } + if (joint_germline && (!tools || !(tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope')))) { + error("The GATK's Haplotypecaller, Sentieon's Dnascope or Sentieon's Haplotyper should be specified as one of the tools when doing joint germline variant calling.) ") + } + + if (tools && tools.split(',').contains('haplotypecaller') && joint_germline && no_intervals) { + System.err.println("Joint germline variant calling with GATK's HaplotypeCaller requires intervals because GenomicsDB cannot be used without them. Please provide intervals or remove `--no_intervals`.") + error("Execution halted due to missing intervals.") + } + + if (tools && (tools.split(',').contains('haplotypecaller') || tools.split(',').contains('sentieon_haplotyper') || tools.split(',').contains('sentieon_dnascope')) && joint_germline && (!dbsnp || !known_indels || !known_snps)) { + log.warn( + """If GATK's Haplotypecaller, Sentieon's Dnascope and/or Sentieon's Haplotyper is specified, but without `--dbsnp`, `--known_snps`, `--known_indels` or the associated resource labels (ie `known_snps_vqsr`), no variant recalibration will be done. For recalibration you must provide all of these resources.\nFor more information see VariantRecalibration: https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator""" + ) + } + + if (tools && tools.split(',').contains('sentieon_dnascope') && joint_germline && (!sentieon_dnascope_emit_mode || !sentieon_dnascope_emit_mode.split(',').contains('gvcf'))) { + error("When using Sentieon Dnascope for joint-germline variant-calling the option `--sentieon_dnascope_emit_mode` has to include `gvcf`.") + } + + if (tools && tools.split(',').contains('sentieon_haplotyper') && joint_germline && (!sentieon_haplotyper_emit_mode || !sentieon_haplotyper_emit_mode.split(',').contains('gvcf'))) { + error("When using Sentieon Haplotyper for joint-germline variant-calling the option `--sentieon_haplotyper_emit_mode` has to include `gvcf`.") + } + + + // Fails when --joint_mutect2 is used without enabling mutect2 + if (joint_mutect2 && (!tools || !tools.split(',').contains('mutect2'))) { + error("The mutect2 should be specified as one of the tools when doing joint somatic variant calling with Mutect2. (The mutect2 could be specified by adding `--tools mutect2` to the nextflow command.)") + } + + // Fails when missing tools for variant_calling or annotate + if ((step == 'variant_calling' || step == 'annotate') && !tools) { + error("Please specify at least one tool when using `--step ${step}`.\nhttps://nf-co.re/sarek/parameters#tools") + } + + // Fails when missing sex information for CNV tools or varlociraptor + if (tools && (tools.split(',').contains('ascat') || tools.split(',').contains('controlfreec') || tools.split(',').contains('varlociraptor'))) { + input_sample.map { + if (it[0].sex == 'NA') { + error("Please specify sex information for each sample in your samplesheet when using '--tools' with 'ascat' or 'controlfreec' or 'varlociraptor'.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + } + } + + // Fails when varlociraptor is enable for tumor samples but no contamination is provided + if (tools && tools.split(',').contains('varlociraptor')) { + input_sample.map { + if (it[0].status == 1 && !it[0].containsKey('contamination')) { + error("Please specify contamination information for each tumor sample in your samplesheet when using '--tools' with 'varlociraptor'.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + } + } + + // Fails when bcftools annotate is used but no files are supplied + if (tools && tools.split(',').contains('bcfann') && !(bcftools_annotations && bcftools_annotations_tbi && bcftools_header_lines)) { + error("Please specify --bcftools_annotations, --bcftools_annotations_tbi, and --bcftools_header_lines, when using BCFTools annotations") + } + + // Fails when snpeff annotation is enabled but snpeff_db is not specified + if ((snpeff_cache && tools && (tools.split(',').contains("snpeff") || tools.split(',').contains('merge'))) && !snpeff_db) { + error("Please specify --snpeff_db") + } + + emit: + input_sample +} diff --git a/subworkflows/local/samplesheet_to_channel/tests/main.nf.test b/subworkflows/local/samplesheet_to_channel/tests/main.nf.test new file mode 100644 index 0000000000..867d07ea2e --- /dev/null +++ b/subworkflows/local/samplesheet_to_channel/tests/main.nf.test @@ -0,0 +1,62 @@ +nextflow_workflow { + + name "Test Workflow SAMPLESHEET_TO_CHANNEL" + script "../main.nf" + workflow "SAMPLESHEET_TO_CHANNEL" + + test("Should run without failures") { + when { + params { + } + workflow { + """ + // define inputs of the workflow here. Example: + input[0] = Channel.of([ + ['patient':'test', 'sample':'test', + 'sex':'XX', 'status':0, 'lane':'test_L1'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + [], [], [], [], [], [], [], [], [] + ]) + input[1] = 'bwa-mem' // aligner + input[2] = [] // ascat_alleles + input[3] = [] // ascat_loci + input[4] = [] // ascat_loci_gc + input[5] = [] // ascat_loci_rt + input[6] = [] // bcftools_annotations + input[7] = [] // bcftools_annotations_tbi + input[8] = [] // bcftools_header_lines + input[9] = false // build_only_index + input[10] = [] // dbsnp + input[11] = [] // fasta + input[12] = [] // germline_resource + input[13] = [] // intervals + input[14] = false // joint_germline + input[15] = false // joint_mutect2 + input[16] = [] // known_indels + input[17] = [] // known_snps + input[18] = false // no_intervals + input[19] = [] // pon + input[20] = 'variant' // sentieon_dnascope_emit_mode + input[21] = 'variant' // sentieon_haplotyper_emit_mode + input[22] = '' // seq_center + input[23] = 'ILLUMINA' // seq_platform + input[24] = 'baserecalibrator' // skip_tools + input[25] = [] // snpeff_cache + input[26] = 'WBcel235.99' // snpeff_db + input[27] = 'mapping' // step + input[28] = 'strelka' // tools + input[29] = [] // umi_read_structure + input[30] = false // wes + """ + } + } + + then { + assert workflow.success + assert snapshot(workflow.out).match() + } + + } + +} diff --git a/subworkflows/local/samplesheet_to_channel/tests/main.nf.test.snap b/subworkflows/local/samplesheet_to_channel/tests/main.nf.test.snap new file mode 100644 index 0000000000..19fcc95d66 --- /dev/null +++ b/subworkflows/local/samplesheet_to_channel/tests/main.nf.test.snap @@ -0,0 +1,51 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "patient": "test", + "sample": "test", + "sex": "XX", + "status": 0, + "lane": "test_L1", + "id": "test-test_L1", + "data_type": "fastq_gz", + "num_lanes": 1, + "size": 1 + }, + [ + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz" + ] + ] + ], + "input_sample": [ + [ + { + "patient": "test", + "sample": "test", + "sex": "XX", + "status": 0, + "lane": "test_L1", + "id": "test-test_L1", + "data_type": "fastq_gz", + "num_lanes": 1, + "size": 1 + }, + [ + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz" + ] + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-04T10:25:14.620549" + } +} diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf deleted file mode 100644 index decaba67d6..0000000000 --- a/subworkflows/local/tumor_variant_calling.nf +++ /dev/null @@ -1,261 +0,0 @@ -// -// TUMOR VARIANT CALLING -// Should be only run on patients without normal sample -// - - -include { BGZIP as BGZIP_FREEBAYES } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_MANTA_SMALL_INDELS } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_MANTA_SV } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_MANTA_TUMOR } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_STRELKA } from '../../modules/local/bgzip' -include { BGZIP as BGZIP_STRELKA_GENOME } from '../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_VCF_FREEBAYES } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_MANTA_SMALL_INDELS } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_MANTA_SV } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_MANTA_TUMOR } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_STRELKA } from '../../modules/local/concat_vcf/main' -include { CONCAT_VCF as CONCAT_VCF_STRELKA_GENOME } from '../../modules/local/concat_vcf/main' -include { FREEBAYES } from '../../modules/nf-core/modules/freebayes/main' -include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk_tumor_only_somatic_variant_calling/main' -include { MANTA_TUMORONLY } from '../../modules/nf-core/modules/manta/tumoronly/main' -include { STRELKA_GERMLINE as STRELKA_TUMORONLY } from '../../modules/nf-core/modules/strelka/germline/main' - -workflow TUMOR_ONLY_VARIANT_CALLING { - take: - tools // Mandatory, list of tools to apply - cram_recalibrated // channel: [mandatory] cram - dbsnp // channel: [mandatory] dbsnp - dbsnp_tbi // channel: [mandatory] dbsnp_tbi - dict // channel: [mandatory] dict - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fasta_fai - intervals // channel: [mandatory] intervals/target regions - intervals_bed_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combine_gz_tbi // channel: [mandatory] intervals/target regions index zipped and indexed - intervals_bed_combine_gz // channel: [mandatory] intervals/target regions index zipped and indexed in one file - num_intervals // val: number of intervals that are used to parallelize exection, either based on capture kit or GATK recommended for WGS - no_intervals - germline_resource - germline_resource_tbi // channel - panel_of_normals - panel_of_normals_tbi - - - main: - - if(!tools) tools = "" - - ch_versions = Channel.empty() - freebayes_vcf = Channel.empty() - manta_vcf = Channel.empty() - mutect2_vcf = Channel.empty() - strelka_vcf = Channel.empty() - - cram_recalibrated.combine(intervals).map{ meta, cram, crai, intervals -> - sample = meta.sample - new_intervals = intervals.baseName != "no_intervals" ? intervals : [] - id = new_intervals ? sample + "_" + new_intervals.baseName : sample - new_new_meta = [ id: id, sample: meta.sample, gender: meta.gender, status: meta.status, patient: meta.patient ] - [new_new_meta, cram, crai, new_intervals] - }.set{cram_recalibrated_intervals} - - cram_recalibrated.combine(intervals_bed_gz_tbi) - .map{ meta, cram, crai, bed, tbi -> - sample = meta.sample - new_bed = bed.simpleName != "no_intervals" ? bed : [] - new_tbi = tbi.simpleName != "no_intervals" ? tbi : [] - id = new_bed ? sample + "_" + new_bed.simpleName : sample - new_meta = [ id: id, sample: meta.sample, gender: meta.gender, status: meta.status, patient: meta.patient ] - [new_meta, cram, crai, new_bed, new_tbi] - }.set{cram_recalibrated_intervals_gz_tbi} - - - if (tools.contains('freebayes')){ - - cram_recalibrated.combine(intervals).map{ meta, cram, crai, intervals -> - new_meta = meta.clone() - new_meta.id = meta.sample + "_" + intervals.simpleName - new_meta.id = intervals.baseName != "no_intervals" ? meta.sample + "_" + intervals.baseName : meta.sample - intervals = intervals.baseName != "no_intervals" ? intervals : [] - [new_meta, cram, crai, [], [], intervals] - }.set{cram_recalibrated_intervals_freebayes} - - FREEBAYES( - cram_recalibrated_intervals_freebayes, - fasta, - fasta_fai, - [], - [], - [] - ) - ch_versions = ch_versions.mix(FREEBAYES.out.versions) - - if(no_intervals){ - TABIX_FREEBAYES(FREEBAYES.out.vcf) - freebayes_vcf_gz = FREEBAYES.out.vcf - ch_versions = ch_versions.mix(TABIX_FREEBAYES.out.versions) - }else{ - BGZIP_FREEBAYES(FREEBAYES.out.vcf) - BGZIP_FREEBAYES.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{freebayes_vcf_to_concat} - - CONCAT_VCF_FREEBAYES(freebayes_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - freebayes_vcf_gz = CONCAT_VCF_FREEBAYES.out.vcf - - ch_versions = ch_versions.mix(BGZIP_FREEBAYES.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_FREEBAYES.out.versions) - } - - freebayes_vcf = freebayes_vcf.mix(freebayes_vcf_gz) - - } - - if (tools.contains('mutect2')) { - - which_norm = [] - cram_recalibrated_intervals.map{ meta, cram, crai, intervals -> [meta, cram, crai, intervals, which_norm]}.set{cram_recalibrated_mutect2} - GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING( - cram_recalibrated_mutect2, - fasta, - fasta_fai, - dict, - germline_resource, - germline_resource_tbi, - panel_of_normals, - panel_of_normals_tbi, - num_intervals, - no_intervals, - intervals_bed_combine_gz - ) - - ch_versions = ch_versions.mix(GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.versions) - - //mutect2_vcf_tbi = mutect2_vcf_tbi.mix(GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING.out.mutect2_vcf_gz_tbi) - } - - if (tools.contains('manta')){ - //TODO: Research if splitting by intervals is ok, we pretend for now it is fine. Seems to be the consensus on upstream modules implementaiton too - - MANTA_TUMORONLY( - cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai - ) - - ch_versions = ch_versions.mix(MANTA_TUMORONLY.out.versions) - - if(no_intervals){ - manta_candidate_small_indels_vcf = MANTA_TUMORONLY.out.candidate_small_indels_vcf - manta_candidate_sv_vcf = MANTA_TUMORONLY.out.candidate_sv_vcf - manta_tumor_sv_vcf = MANTA_TUMORONLY.out.tumor_sv_vcf - }else{ - - BGZIP_MANTA_SV(MANTA_TUMORONLY.out.candidate_small_indels_vcf) - BGZIP_MANTA_SMALL_INDELS(MANTA_TUMORONLY.out.candidate_sv_vcf) - BGZIP_MANTA_TUMOR(MANTA_TUMORONLY.out.tumor_sv_vcf) - - BGZIP_MANTA_SV.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_sv_vcf_to_concat} - - BGZIP_MANTA_SMALL_INDELS.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_small_indels_vcf_to_concat} - - BGZIP_MANTA_TUMOR.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{manta_tumor_sv_vcf_to_concat} - - CONCAT_VCF_MANTA_SV(manta_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - CONCAT_VCF_MANTA_SMALL_INDELS(manta_small_indels_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_VCF_MANTA_TUMOR(manta_tumor_sv_vcf_to_concat, fasta_fai, intervals_bed_combine_gz) - - manta_candidate_small_indels_vcf = CONCAT_VCF_MANTA_SV.out.vcf - manta_candidate_sv_vcf = CONCAT_VCF_MANTA_SMALL_INDELS.out.vcf - manta_tumor_sv_vcf = CONCAT_VCF_MANTA_TUMOR.out.vcf - - ch_versions = ch_versions.mix(BGZIP_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(BGZIP_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(BGZIP_MANTA_TUMOR.out.versions) - - ch_versions = ch_versions.mix(CONCAT_VCF_MANTA_SV.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_MANTA_SMALL_INDELS.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_MANTA_TUMOR.out.versions) - - } - - manta_vcf = manta_vcf.mix(manta_candidate_small_indels_vcf, manta_candidate_sv_vcf, manta_tumor_sv_vcf) - } - - if (tools.contains('strelka')) { - //TODO: research if multiple targets can be provided: waiting for reply - - STRELKA_TUMORONLY( - cram_recalibrated_intervals_gz_tbi, - fasta, - fasta_fai - ) - - ch_versions = ch_versions.mix(STRELKA_TUMORONLY.out.versions) - - if(no_intervals){ - strelka_vcf_gz = STRELKA_TUMORONLY.out.vcf - strelka_genome_vcf_gz = STRELKA_TUMORONLY.out.genome_vcf - - }else{ - BGZIP_STRELKA(STRELKA_TUMORONLY.out.vcf) - BGZIP_STRELKA_GENOME(STRELKA_TUMORONLY.out.genome_vcf) - - BGZIP_STRELKA.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{strelka_vcf_to_concat} - - BGZIP_STRELKA_GENOME.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals) - .set{strelka_genome_vcf_to_concat} - - CONCAT_VCF_STRELKA(strelka_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - CONCAT_VCF_STRELKA_GENOME(strelka_genome_vcf_to_concat,fasta_fai, intervals_bed_combine_gz) - - strelka_vcf_gz = CONCAT_VCF_STRELKA.out.vcf - strelka_genome_vcf_gz = CONCAT_VCF_STRELKA_GENOME.out.vcf - - ch_versions = ch_versions.mix(BGZIP_STRELKA.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_STRELKA.out.versions) - } - - strelka_vcf = strelka_vcf.mix(strelka_vcf_gz,strelka_genome_vcf_gz ) - } - - - // if (tools.contains('tiddit')){ - // } - - emit: - versions = ch_versions - - freebayes_vcf - manta_vcf - mutect2_vcf - strelka_vcf -} diff --git a/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf new file mode 100644 index 0000000000..06bf0fb8d8 --- /dev/null +++ b/subworkflows/local/utils_nfcore_sarek_pipeline/main.nf @@ -0,0 +1,365 @@ +// Subworkflow with functionality specific to the nf-core/sarek pipeline + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { SAMPLESHEET_TO_CHANNEL } from '../samplesheet_to_channel' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW TO INITIALISE PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_INITIALISATION { + take: + version // boolean: Display version and exit + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + help // boolean: Display help message and exit + help_full // boolean: Show the full help message + show_hidden // boolean: Show hidden parameters in the help message + + main: + + versions = channel.empty() + + // Print version and exit if required and dump pipeline parameters to JSON file + UTILS_NEXTFLOW_PIPELINE( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1, + ) + + // Validate parameters and generate parameter summary to stdout + // + def before_text = """ +-\033[2m----------------------------------------------------\033[0m- + \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m +\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m +\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m +\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m + \033[0;32m`._,._,\'\033[0m +\033[0;37m ____\033[0m +\033[0;37m .´ _ `.\033[0m +\033[0;37m / \033[0;32m|\\\033[0m`-_ \\\033[0m \033[0;34m __ __ ___ \033[0m +\033[0;37m | \033[0;32m| \\\033[0m `-|\033[0m \033[0;34m|__` /\\ |__) |__ |__/\033[0m +\033[0;37m \\ \033[0;32m| \\\033[0m /\033[0m \033[0;34m.__| /¯¯\\ | \\ |___ | \\\033[0m +\033[0;37m `\033[0;32m|\033[0m____\033[0;32m\\\033[0m´\033[0m + +\033[0;35m nf-core/sarek ${workflow.manifest.version}\033[0m +-\033[2m----------------------------------------------------\033[0m- +""" + def after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { doi -> " https://doi.org/${doi.trim().replace('https://doi.org/','')}"}.join("\n")}${workflow.manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/nf-core/sarek/blob/master/CITATIONS.md +""" + command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + + UTILS_NFSCHEMA_PLUGIN( + workflow, + validate_params, + null, + help, + help_full, + show_hidden, + before_text, + after_text, + command, + ) + + // Check config provided to the pipeline + UTILS_NFCORE_PIPELINE(nextflow_cli_args) + + // Custom validation for pipeline parameters + validateInputParameters() + + // Check input path parameters to see if they exist + def checkPathParamList = [ + params.ascat_alleles, + params.ascat_loci, + params.ascat_loci_gc, + params.ascat_loci_rt, + params.bwa, + params.bwamem2, + params.bcftools_annotations, + params.bcftools_annotations_tbi, + params.bcftools_columns, + params.bcftools_header_lines, + params.cf_chrom_len, + params.chr_dir, + params.cnvkit_reference, + params.dbnsfp, + params.dbnsfp_tbi, + params.dbsnp, + params.dbsnp_tbi, + params.dict, + params.dragmap, + params.fasta, + params.fasta_fai, + params.germline_resource, + params.germline_resource_tbi, + params.input, + params.intervals, + params.known_indels, + params.known_indels_tbi, + params.known_snps, + params.known_snps_tbi, + params.mappability, + params.multiqc_config, + params.ngscheckmate_bed, + params.pon, + params.pon_tbi, + params.sentieon_dnascope_model, + params.spliceai_indel, + params.spliceai_indel_tbi, + params.spliceai_snv, + params.spliceai_snv_tbi, + ] + + // only check if we are using the tools + if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) { + checkPathParamList.add(params.snpeff_cache) + } + if (params.tools && (params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) { + checkPathParamList.add(params.vep_cache) + } + + params.input_restart = retrieveInput((!params.build_only_index && !input), params.step, params.outdir) + + ch_from_samplesheet = params.build_only_index + ? channel.empty() + : input + ? channel.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input.json")) + : channel.fromList(samplesheetToList(params.input_restart, "${projectDir}/assets/schema_input.json")) + + SAMPLESHEET_TO_CHANNEL( + ch_from_samplesheet, + params.aligner, + params.ascat_alleles, + params.ascat_loci, + params.ascat_loci_gc, + params.ascat_loci_rt, + params.bcftools_annotations, + params.bcftools_annotations_tbi, + params.bcftools_columns, + params.bcftools_header_lines, + params.build_only_index, + params.dbsnp, + params.fasta, + params.germline_resource, + params.intervals, + params.joint_germline, + params.joint_mutect2, + params.known_indels, + params.known_snps, + params.no_intervals, + params.pon, + params.sentieon_dnascope_emit_mode, + params.sentieon_haplotyper_emit_mode, + params.seq_center, + params.seq_platform, + params.skip_tools, + params.snpeff_cache, + params.snpeff_db, + params.step, + params.tools, + params.umi_length, + params.umi_location, + params.umi_in_read_header, + params.umi_read_structure, + params.wes, + ) + + emit: + samplesheet = SAMPLESHEET_TO_CHANNEL.out.input_sample + versions +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW FOR PIPELINE COMPLETION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_COMPLETION { + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + multiqc_report // string: Path to MultiQC report + + main: + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + def multiqc_reports = multiqc_report.toList() + + // Completion email and summary + workflow.onComplete { + if (email || email_on_fail) { + completionEmail( + summary_params, + email, + email_on_fail, + plaintext_email, + outdir, + monochrome_logs, + multiqc_reports.getVal(), + ) + } + + completionSummary(monochrome_logs) + } + + workflow.onError { + log.error("Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting") + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +// Check and validate pipeline parameters +def validateInputParameters() { + genomeExistsError() + sparkAndBam() +} + +// Exit pipeline if incorrect --genome key provided +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + " Currently, the available genome keys are:\n" + " ${params.genomes.keySet().join(", ")}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// Exit if trying to use "use_gatk_spark", "save_mapped": true and "save_output_as_bam" +def sparkAndBam() { + if (params.use_gatk_spark && params.save_mapped && params.save_output_as_bam) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " The --use_gatk_spark option is not compatible with --save_mapped and --save_output_as_bam.\n" + " If you want to save your bam files please swap to the normal gatk implementation.\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + System.err.println(error_string) + error(error_string) + } +} + +// Generate methods description for MultiQC +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + ".", + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • ", + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familiar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + def manifest_doi = meta.manifest_map.doi.tokenize(",") + manifest_doi.each { doi_ref -> + temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + } + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } + else { + meta["doi_text"] = "" + } + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of the pipeline version used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} + +// retrieveInput +def retrieveInput(need_input, step, outdir) { + def input = null + if (need_input) { + if (step == 'mapping') { + error("Can't start ${step} step without samplesheet") + } + else if (step == 'markduplicates') { + log.warn("Using file ${outdir}/csv/mapped.csv") + input = outdir + "/csv/mapped.csv" + } + else if (step == 'prepare_recalibration') { + log.warn("Using file ${outdir}/csv/markduplicates_no_table.csv") + input = outdir + "/csv/markduplicates_no_table.csv" + } + else if (step == 'recalibrate') { + log.warn("Using file ${outdir}/csv/markduplicates.csv") + input = outdir + "/csv/markduplicates.csv" + } + else if (step == 'variant_calling') { + log.warn("Using file ${outdir}/csv/recalibrated.csv") + input = outdir + "/csv/recalibrated.csv" + } + else if (step == 'annotate') { + log.warn("Using file ${outdir}/csv/variantcalled.csv") + input = outdir + "/csv/variantcalled.csv" + } + else { + log.warn("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") + error("Unknown step ${step}") + } + } + return input +} diff --git a/subworkflows/local/vcf_annotate_all/main.nf b/subworkflows/local/vcf_annotate_all/main.nf new file mode 100644 index 0000000000..72f9dfe061 --- /dev/null +++ b/subworkflows/local/vcf_annotate_all/main.nf @@ -0,0 +1,86 @@ +// +// ANNOTATION +// + +include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate' +include { ENSEMBLVEP_VEP } from '../../../modules/nf-core/ensemblvep/vep' +include { ENSEMBLVEP_VEP as VCF_ANNOTATE_MERGE } from '../../../modules/nf-core/ensemblvep/vep' +include { VCF_ANNOTATE_SNPEFF } from '../../nf-core/vcf_annotate_snpeff' +include { SNPSIFT_ANNMEM } from '../../../modules/nf-core/snpsift/annmem' + +workflow VCF_ANNOTATE_ALL { + take: + vcf // channel: [ val(meta), vcf ] + fasta + tools // Mandatory, list of tools to apply + snpeff_db + snpeff_cache + vep_genome + vep_species + vep_cache_version + vep_cache + vep_extra_files + bcftools_annotations + bcftools_annotations_index + bcftools_columns + bcftools_header_lines + snpsift_db // channel: [[databases], [tbis], [vardbs], [fields], [prefixes]] + + main: + vcf_ann = channel.empty() + tab_ann = channel.empty() + json_ann = channel.empty() + versions = channel.empty() + + if (tools.split(',').contains('bcfann')) { + BCFTOOLS_ANNOTATE( + vcf.map { meta, vcf_ -> [meta, vcf_, []] }.combine(bcftools_annotations).combine(bcftools_annotations_index), + bcftools_columns, + bcftools_header_lines, + [], + ) + + vcf_ann = vcf_ann.mix(BCFTOOLS_ANNOTATE.out.vcf.join(BCFTOOLS_ANNOTATE.out.tbi, failOnDuplicate: true, failOnMismatch: true)) + versions = versions.mix(BCFTOOLS_ANNOTATE.out.versions) + } + + if (tools.split(',').contains('merge') || tools.split(',').contains('snpeff')) { + VCF_ANNOTATE_SNPEFF(vcf, snpeff_db, snpeff_cache) + + vcf_ann = vcf_ann.mix(VCF_ANNOTATE_SNPEFF.out.vcf_tbi) + } + + if (tools.split(',').contains('merge')) { + vcf_ann_for_merge = VCF_ANNOTATE_SNPEFF.out.vcf_tbi.map { meta, vcf_, _tbi -> [meta, vcf_, []] } + VCF_ANNOTATE_MERGE(vcf_ann_for_merge, vep_genome,vep_species,vep_cache_version, vep_cache, fasta, vep_extra_files) + + vcf_ann = vcf_ann.mix(VCF_ANNOTATE_MERGE.out.vcf.join(VCF_ANNOTATE_MERGE.out.tbi, failOnDuplicate: true, failOnMismatch: true)) + } + + if (tools.split(',').contains('vep')) { + vcf_for_vep = vcf.map { meta, vcf_ -> [meta, vcf_, []] } + ENSEMBLVEP_VEP(vcf_for_vep, vep_genome, vep_species, vep_cache_version, vep_cache, fasta, vep_extra_files) + + vcf_ann = vcf_ann.mix(ENSEMBLVEP_VEP.out.vcf.join(ENSEMBLVEP_VEP.out.tbi, failOnDuplicate: true, failOnMismatch: true)) + tab_ann = tab_ann.mix(ENSEMBLVEP_VEP.out.tab) + json_ann = json_ann.mix(ENSEMBLVEP_VEP.out.json) + } + + // SnpSift runs on all final annotated outputs + // If no other annotators were used, fall back to original vcf + if (tools.split(',').contains('snpsift')) { + def has_other_annotators = ['merge', 'snpeff', 'vep', 'bcfann'].any { tool -> tools.split(',').contains(tool) } + def snpsift_input = tools.split(',').contains('merge') + ? VCF_ANNOTATE_MERGE.out.vcf.map { meta, vcf_ -> [meta, vcf_, []] } + : (has_other_annotators ? vcf_ann.map { meta, vcf_, _tbi -> [meta, vcf_, []] } : vcf.map { meta, vcf_ -> [meta, vcf_, []] }) + + SNPSIFT_ANNMEM(snpsift_input, snpsift_db) + vcf_ann = vcf_ann.mix(SNPSIFT_ANNMEM.out.vcf.join(SNPSIFT_ANNMEM.out.tbi, failOnDuplicate: true, failOnMismatch: true)) + } + + emit: + vcf_ann // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] + tab_ann + json_ann + versions // path: versions.yml +} diff --git a/subworkflows/local/vcf_concatenate_germline/main.nf b/subworkflows/local/vcf_concatenate_germline/main.nf new file mode 100644 index 0000000000..31dc6de01b --- /dev/null +++ b/subworkflows/local/vcf_concatenate_germline/main.nf @@ -0,0 +1,38 @@ +// +// CONCATENATE Germline VCFs +// + +// Concatenation of germline vcf-files +include { ADD_INFO_TO_VCF } from '../../../modules/local/add_info_to_vcf' +include { BCFTOOLS_CONCAT as GERMLINE_VCFS_CONCAT } from '../../../modules/nf-core/bcftools/concat' +include { BCFTOOLS_SORT as GERMLINE_VCFS_CONCAT_SORT } from '../../../modules/nf-core/bcftools/sort' +include { TABIX_BGZIPTABIX as TABIX_EXT_VCF } from '../../../modules/nf-core/tabix/bgziptabix' +include { TABIX_TABIX as TABIX_GERMLINE_VCFS_CONCAT_SORT } from '../../../modules/nf-core/tabix/tabix' + +workflow CONCATENATE_GERMLINE_VCFS { + take: + vcfs + + main: + versions = channel.empty() + + // Concatenate vcf-files + ADD_INFO_TO_VCF(vcfs) + TABIX_EXT_VCF(ADD_INFO_TO_VCF.out.vcf) + + // Gather vcfs and vcf-tbis for concatenating germline-vcfs + germline_vcfs_with_tbis = TABIX_EXT_VCF.out.gz_index.groupTuple() + + GERMLINE_VCFS_CONCAT(germline_vcfs_with_tbis) + GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT.out.vcf) + + // Gather versions of all tools used + versions = versions.mix(ADD_INFO_TO_VCF.out.versions) + versions = versions.mix(GERMLINE_VCFS_CONCAT.out.versions) + versions = versions.mix(GERMLINE_VCFS_CONCAT_SORT.out.versions) + + emit: + vcfs = GERMLINE_VCFS_CONCAT_SORT.out.vcf // concatenated vcfs + tbis = GERMLINE_VCFS_CONCAT_SORT.out.tbi // matching tbis + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/vcf_consensus/main.nf b/subworkflows/local/vcf_consensus/main.nf new file mode 100644 index 0000000000..aa18c0c939 --- /dev/null +++ b/subworkflows/local/vcf_consensus/main.nf @@ -0,0 +1,79 @@ +// +// Intersect VCFs and merge consensus variants from all callers +// + +include { BCFTOOLS_ISEC } from '../../../modules/nf-core/bcftools/isec' +include { BCFTOOLS_CONCAT } from '../../../modules/nf-core/bcftools/concat' +include { CONSENSUS_FROM_SITES } from '../../../modules/local/consensus_from_sites' + +workflow CONSENSUS { + + take: + vcfs // [meta, vcf ,tbi] + + main: + ch_versions = Channel.empty() + + ch_vcfs = vcfs + .branch{ meta, vcf, tbi -> + // Somatic Strelka samples have tumor_id field (tumor-normal pairs) + // This is semantically equivalent to checking status == '1' (tumor) but more explicit + strelka_somatic: meta.variantcaller == 'strelka' && meta.tumor_id + other: true + } + + // Group somatic Strelka SNVs and INDELs by sample for concatenation + // Remove filename from grouping key since SNVs and INDELs have different filenames but should be grouped together + ch_strelka_grouped = ch_vcfs.strelka_somatic + .map { meta, vcf, tbi -> + def key = meta - meta.subMap('filename') + [key, vcf, tbi] + } + .groupTuple(size: 2) + + BCFTOOLS_CONCAT(ch_strelka_grouped)// somatic strelkas have two vcf files: SNPs and indels + ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions) + + // Combine concat strelka with remaining VCFs + // Bundle each VCF with its caller to preserve association through grouping + ch_consensus_in = ch_vcfs.other + .mix(BCFTOOLS_CONCAT.out.vcf.join(BCFTOOLS_CONCAT.out.tbi)) + .map { meta, vcf, tbi -> + def caller = meta.variantcaller + def groupKey = meta - meta.subMap('variantcaller', 'contamination', 'filename', 'data_type', 'num_intervals') + [groupKey, [vcf, caller], tbi] + } + // TODO: blocking operation unless we learn how many variantcallers were + // specified - also depends on whether this is n, t, or nt and how many + // variantcallers are actually executed + .groupTuple() + .map { meta, vcf_caller_pairs, tbis -> + // Sort by vcf name for predictable isec input order + // callers list will match isec output order in sites.txt + def sorted_pairs = vcf_caller_pairs.sort { a, b -> a[0].name <=> b[0].name } + def sorted_vcfs = sorted_pairs.collect { it[0] } + def callers = sorted_pairs.collect { it[1] } + [meta + [callers: callers], sorted_vcfs, tbis] + } + + + BCFTOOLS_ISEC(ch_consensus_in) + ch_versions = ch_versions.mix(BCFTOOLS_ISEC.out.versions) + + // Filter out empty isec results (no consensus variants found) + ch_isec_with_results = BCFTOOLS_ISEC.out.results + .filter { meta, dir -> + def sites_file = dir.resolve('sites.txt') + sites_file.exists() && sites_file.size() > 0 + } + + // Create consensus VCF from sites.txt with caller presence info + // Versions are collected via topic channel + CONSENSUS_FROM_SITES(ch_isec_with_results) + + emit: + versions = ch_versions + vcfs = CONSENSUS_FROM_SITES.out.vcf + tbis = CONSENSUS_FROM_SITES.out.tbi + +} diff --git a/subworkflows/local/vcf_normalization/main.nf b/subworkflows/local/vcf_normalization/main.nf new file mode 100644 index 0000000000..afe7b350b2 --- /dev/null +++ b/subworkflows/local/vcf_normalization/main.nf @@ -0,0 +1,39 @@ +// Normalize all unannotated VCFs + +// Import modules +include { ADD_INFO_TO_VCF } from '../../../modules/local/add_info_to_vcf' +include { BCFTOOLS_NORM as VCFS_NORM } from '../../../modules/nf-core/bcftools/norm' +include { BCFTOOLS_SORT as VCFS_NORM_SORT } from '../../../modules/nf-core/bcftools/sort' +include { TABIX_BGZIPTABIX as TABIX_EXT_VCF } from '../../../modules/nf-core/tabix/bgziptabix' + +// Workflow to normalize, compress, and index VCF files +workflow NORMALIZE_VCFS { + take: + vcfs + fasta + + main: + versions = channel.empty() + + // Add additional information to VCF files + ADD_INFO_TO_VCF(vcfs) + + // Compress the VCF files with bgzip + TABIX_EXT_VCF(ADD_INFO_TO_VCF.out.vcf) + + // Normalize the VCF files with BCFTOOLS_NORM + VCFS_NORM(TABIX_EXT_VCF.out.gz_index, fasta) + + // Sort the normalized VCF files + VCFS_NORM_SORT(VCFS_NORM.out.vcf) + + // Gather versions of all tools used + versions = versions.mix(ADD_INFO_TO_VCF.out.versions) + versions = versions.mix(VCFS_NORM.out.versions) + versions = versions.mix(VCFS_NORM_SORT.out.versions) + + emit: + vcfs = VCFS_NORM_SORT.out.vcf // normalized vcfs + tbis = VCFS_NORM_SORT.out.tbi // matching tbis + versions // Channel: [versions.yml] +} diff --git a/subworkflows/local/vcf_qc_bcftools_vcftools/main.nf b/subworkflows/local/vcf_qc_bcftools_vcftools/main.nf new file mode 100644 index 0000000000..ab69bdeb15 --- /dev/null +++ b/subworkflows/local/vcf_qc_bcftools_vcftools/main.nf @@ -0,0 +1,30 @@ +include { BCFTOOLS_STATS } from '../../../modules/nf-core/bcftools/stats/main' +include { VCFTOOLS as VCFTOOLS_SUMMARY } from '../../../modules/nf-core/vcftools/main' +include { VCFTOOLS as VCFTOOLS_TSTV_COUNT } from '../../../modules/nf-core/vcftools/main' +include { VCFTOOLS as VCFTOOLS_TSTV_QUAL } from '../../../modules/nf-core/vcftools/main' + +workflow VCF_QC_BCFTOOLS_VCFTOOLS { + take: + vcf + target_bed + + main: + + versions = Channel.empty() + + BCFTOOLS_STATS(vcf.map{ meta, vcf_ -> [ meta, vcf_, [] ] }, [[:],[]], [[:],[]], [[:],[]], [[:],[]], [[:],[]]) + VCFTOOLS_TSTV_COUNT(vcf, target_bed, []) + VCFTOOLS_TSTV_QUAL(vcf, target_bed, []) + VCFTOOLS_SUMMARY(vcf, target_bed, []) + + versions = versions.mix(BCFTOOLS_STATS.out.versions) + versions = versions.mix(VCFTOOLS_TSTV_COUNT.out.versions) + + emit: + bcftools_stats = BCFTOOLS_STATS.out.stats + vcftools_tstv_counts = VCFTOOLS_TSTV_COUNT.out.tstv_count + vcftools_tstv_qual = VCFTOOLS_TSTV_QUAL.out.tstv_qual + vcftools_filter_summary = VCFTOOLS_SUMMARY.out.filter_summary + + versions +} diff --git a/subworkflows/local/vcf_variant_filtering_gatk/main.nf b/subworkflows/local/vcf_variant_filtering_gatk/main.nf new file mode 100644 index 0000000000..ce2b749c35 --- /dev/null +++ b/subworkflows/local/vcf_variant_filtering_gatk/main.nf @@ -0,0 +1,42 @@ +include { GATK4_CNNSCOREVARIANTS as CNNSCOREVARIANTS } from '../../../modules/nf-core/gatk4/cnnscorevariants/main' +include { GATK4_FILTERVARIANTTRANCHES as FILTERVARIANTTRANCHES } from '../../../modules/nf-core/gatk4/filtervarianttranches/main' + +workflow VCF_VARIANT_FILTERING_GATK { + + take: + vcf // meta, vcf, tbi, intervals + fasta + fasta_fai + dict + intervals_bed_combined + known_sites + known_sites_tbi + + main: + + versions = Channel.empty() + + // Don't scatter/gather by intervals, because especially for small regions (targeted or WGS), it easily fails with 0 SNPS in region + cnn_in = vcf.combine(intervals_bed_combined).map{ meta, vcf_, tbi, intervals -> [ meta, vcf_, tbi, [], intervals ] } + + CNNSCOREVARIANTS(cnn_in, fasta, fasta_fai, dict, [], []) + + FILTERVARIANTTRANCHES(CNNSCOREVARIANTS.out.vcf.join(CNNSCOREVARIANTS.out.tbi, failOnDuplicate: true, failOnMismatch: true).combine(intervals_bed_combined), known_sites, known_sites_tbi, fasta, fasta_fai, dict) + + filtered_vcf = FILTERVARIANTTRANCHES.out.vcf + // remove no longer necessary field: num_intervals + .map{ meta, vcf_ -> [ meta - meta.subMap('num_intervals'), vcf_ ] } + + filtered_tbi = FILTERVARIANTTRANCHES.out.tbi + // remove no longer necessary field: num_intervals + .map{ meta, tbi -> [ meta - meta.subMap('num_intervals'), tbi ] } + + versions = versions.mix(CNNSCOREVARIANTS.out.versions) + versions = versions.mix(FILTERVARIANTTRANCHES.out.versions) + + emit: + filtered_vcf + filtered_tbi + + versions +} diff --git a/subworkflows/local/vcf_varlociraptor_single/main.nf b/subworkflows/local/vcf_varlociraptor_single/main.nf new file mode 100644 index 0000000000..35d5efc228 --- /dev/null +++ b/subworkflows/local/vcf_varlociraptor_single/main.nf @@ -0,0 +1,143 @@ +include { BCFTOOLS_CONCAT as CONCAT_CALLED_CHUNKS } from '../../../modules/nf-core/bcftools/concat' +include { BCFTOOLS_SORT as SORT_CALLED_CHUNKS } from '../../../modules/nf-core/bcftools/sort' +include { BCFTOOLS_SORT as SORT_FINAL_VCF } from '../../../modules/nf-core/bcftools/sort' +include { RBT_VCFSPLIT } from '../../../modules/nf-core/rbt/vcfsplit' +include { VARLOCIRAPTOR_CALLVARIANTS } from '../../../modules/nf-core/varlociraptor/callvariants' +include { VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES } from '../../../modules/nf-core/varlociraptor/estimatealignmentproperties' +include { VARLOCIRAPTOR_PREPROCESS } from '../../../modules/nf-core/varlociraptor/preprocess' +include { YTE as FILL_SCENARIO_FILE } from '../../../modules/nf-core/yte' + +workflow VCF_VARLOCIRAPTOR_SINGLE { + take: + ch_cram + ch_fasta + ch_fasta_fai + ch_scenario + ch_vcf + val_num_chunks + val_sampletype + + main: + ch_versions = channel.empty() + + meta_map = ch_cram.map { meta, _cram, _crai -> meta + [sex_string: (meta.sex == "XX" ? "female" : "male")] } + + FILL_SCENARIO_FILE( + meta_map.combine(ch_scenario).map { meta, scenario_file -> [meta, scenario_file, [], meta] } + ) + ch_scenario_file = FILL_SCENARIO_FILE.out.rendered + + // Estimate alignment properties + VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES( + ch_cram.combine(ch_fasta).combine(ch_fasta_fai).map { meta_cram, cram, crai, _meta_fasta, fasta, _meta_fai, fai -> + [meta_cram, cram, crai, fasta, fai] + } + ) + + // + // CHUNK AND PREPROCESS GERMLINE VCF + // + RBT_VCFSPLIT( + ch_vcf, + val_num_chunks, + ) + + ch_chunked_vcfs = RBT_VCFSPLIT.out.bcfchunks + .transpose() + .map { meta, vcf_chunked -> + [ + meta + [chunk: vcf_chunked.name.split(/\./)[-2]], + vcf_chunked, + ] + } + + // Join each alignment file with its properties + ch_cram_alignment = ch_cram + .join(VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES.out.alignment_properties_json, failOnMismatch: true, failOnDuplicate: true) + .map { meta, cram, crai, json -> [meta.id, meta, cram, crai, json] } + + // Now combine the each chunked VCFs with the alignment data + ch_input_preprocess_chunked = ch_chunked_vcfs + .map { meta, vcf -> [meta.id, meta, vcf] } + .combine(ch_cram_alignment, by: 0) + .combine(ch_fasta) + .combine(ch_fasta_fai) + .map { _id, meta_vcf, vcf, meta_cram, cram, crai, alignment_json, _meta_fasta, fasta, _meta_fai, fasta_fai -> + [ + meta_cram + [ + variantcaller: meta_vcf.variantcaller, + postprocess: 'varlociraptor', + chunk: meta_vcf.chunk, + ], + cram, + crai, + vcf, + alignment_json, + fasta, + fasta_fai, + ] + } + + VARLOCIRAPTOR_PREPROCESS( + ch_input_preprocess_chunked + ) + + // + // CALL VARIANTS WITH VARLOCIRAPTOR + // + ch_vcfs_for_callvariants = VARLOCIRAPTOR_PREPROCESS.out.bcf + .map { meta, bcf -> + [meta.id, meta, bcf] + } + .combine( + ch_scenario_file.map { meta, scenario_file -> [meta.id, scenario_file] }, + by: 0 + ) + .map { _id, meta_normal, normal_bcf, scenario_file -> + [meta_normal, [normal_bcf], scenario_file, val_sampletype] + } + + VARLOCIRAPTOR_CALLVARIANTS( + ch_vcfs_for_callvariants + ) + + // + // SORT AND MERGE CALLED VARIANTS + // + SORT_CALLED_CHUNKS( + VARLOCIRAPTOR_CALLVARIANTS.out.bcf + ) + ch_versions = ch_versions.mix(SORT_CALLED_CHUNKS.out.versions) + + ch_sort_called_chunks_vcf = SORT_CALLED_CHUNKS.out.vcf.branch { + single: val_num_chunks <= 1 + multiple: val_num_chunks > 1 + } + + ch_sort_called_chunks_tbi = SORT_CALLED_CHUNKS.out.tbi.branch { + single: val_num_chunks <= 1 + multiple: val_num_chunks > 1 + } + + ch_vcf_tbi_chunks = ch_sort_called_chunks_vcf.multiple + .join(ch_sort_called_chunks_tbi.multiple, failOnMismatch: true, failOnDuplicate: true) + .map { meta, vcf, tbi -> + [meta - meta.subMap("chunk"), vcf, tbi] + } + .groupTuple(size: val_num_chunks) + + CONCAT_CALLED_CHUNKS(ch_vcf_tbi_chunks) + + ch_versions = ch_versions.mix(CONCAT_CALLED_CHUNKS.out.versions) + + ch_final_vcf = ch_sort_called_chunks_vcf.single.mix(CONCAT_CALLED_CHUNKS.out.vcf) + + SORT_FINAL_VCF(ch_final_vcf) + + ch_versions = ch_versions.mix(SORT_FINAL_VCF.out.versions) + + emit: + vcf = SORT_FINAL_VCF.out.vcf + tbi = SORT_FINAL_VCF.out.tbi + versions = ch_versions +} diff --git a/subworkflows/local/vcf_varlociraptor_somatic/main.nf b/subworkflows/local/vcf_varlociraptor_somatic/main.nf new file mode 100644 index 0000000000..dafba72787 --- /dev/null +++ b/subworkflows/local/vcf_varlociraptor_somatic/main.nf @@ -0,0 +1,294 @@ +include { BCFTOOLS_CONCAT as CONCAT_CALLED_CHUNKS } from '../../../modules/nf-core/bcftools/concat' +include { BCFTOOLS_CONCAT as CONCAT_SOMATIC_STRELKA } from '../../../modules/nf-core/bcftools/concat' +include { BCFTOOLS_MERGE as MERGE_GERMLINE_SOMATIC_VCFS } from '../../../modules/nf-core/bcftools/merge' +include { BCFTOOLS_SORT as SORT_CALLED_CHUNKS } from '../../../modules/nf-core/bcftools/sort' +include { BCFTOOLS_SORT as SORT_FINAL_VCF } from '../../../modules/nf-core/bcftools/sort' +include { TABIX_TABIX as TABIX_GERMLINE } from '../../../modules/nf-core/tabix/tabix' +include { TABIX_TABIX as TABIX_SOMATIC } from '../../../modules/nf-core/tabix/tabix' +include { RBT_VCFSPLIT } from '../../../modules/nf-core/rbt/vcfsplit' +include { VARLOCIRAPTOR_CALLVARIANTS } from '../../../modules/nf-core/varlociraptor/callvariants' +include { VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES as ALIGNMENTPROPERTIES_NORMAL } from '../../../modules/nf-core/varlociraptor/estimatealignmentproperties' +include { VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES as ALIGNMENTPROPERTIES_TUMOR } from '../../../modules/nf-core/varlociraptor/estimatealignmentproperties' +include { VARLOCIRAPTOR_PREPROCESS as PREPROCESS_NORMAL } from '../../../modules/nf-core/varlociraptor/preprocess' +include { VARLOCIRAPTOR_PREPROCESS as PREPROCESS_TUMOR } from '../../../modules/nf-core/varlociraptor/preprocess' +include { YTE as FILL_SCENARIO_FILE } from '../../../modules/nf-core/yte' + +workflow VCF_VARLOCIRAPTOR_SOMATIC { + take: + ch_cram + ch_fasta + ch_fasta_fai + ch_scenario + ch_somatic_vcf + ch_germline_vcf + val_num_chunks + + main: + ch_versions = channel.empty() + + meta_map = ch_cram.map { meta, _normal_cram, _normal_crai, _tumor_cram, _tumor_crai -> + meta + [sex_string: (meta.sex == "XX" ? "female" : "male")] + } + + FILL_SCENARIO_FILE( + meta_map.combine(ch_scenario).map { meta, scenario_file -> [meta, scenario_file, [], meta] } + ) + ch_scenario_file = FILL_SCENARIO_FILE.out.rendered + + cram_normal = ch_cram.map { meta, normal_cram, normal_crai, _tumor_cram, _tumor_crai -> [meta + [match_id: meta.normal_id], normal_cram, normal_crai] } + cram_tumor = ch_cram.map { meta, _normal_cram, _normal_crai, tumor_cram, tumor_crai -> [meta + [match_id: meta.normal_id], tumor_cram, tumor_crai] } + + // Estimate alignment properties + ALIGNMENTPROPERTIES_TUMOR( + cram_tumor.combine(ch_fasta).combine(ch_fasta_fai).map { meta_cram, cram, crai, _meta_fasta, fasta, _meta_fai, fai -> + [meta_cram, cram, crai, fasta, fai] + } + ) + + ALIGNMENTPROPERTIES_NORMAL( + cram_normal.combine(ch_fasta).combine(ch_fasta_fai).map { meta_cram, cram, crai, _meta_fasta, fasta, _meta_fai, fai -> + [meta_cram, cram, crai, fasta, fai] + } + ) + + // + // CONCAT SNV AND INDEL VCFS FOR STRELKA + // + TABIX_SOMATIC(ch_somatic_vcf) + ch_versions = ch_versions.mix(TABIX_SOMATIC.out.versions) + ch_somatic_vcf_tbi = ch_somatic_vcf.join(TABIX_SOMATIC.out.tbi, by: [0]) + + // CONCAT SNV / INDEL VCFs COMING FROM STRELKA + ch_somatic_branched = ch_somatic_vcf_tbi.branch { items -> + strelka: items[0].variantcaller == 'strelka' + other: items[0].variantcaller != 'strelka' + } + + // Group somatic strelka SNVs and INDELs by sample for concatenation + ch_somatic_strelka_grouped = ch_somatic_branched.strelka + .map { meta, vcf, tbi -> [[meta.normal_id, meta.patient], meta, vcf, tbi] } + .groupTuple(by: 0) + .map { _key, meta, vcf_list, tbi_list -> + [meta[0], vcf_list, tbi_list] + } + + CONCAT_SOMATIC_STRELKA(ch_somatic_strelka_grouped) + + // Use concatenated Strelka VCFs for somatic and germline calling, mix with other variant callers + ch_somatic_vcf_conc = CONCAT_SOMATIC_STRELKA.out.vcf + .join(CONCAT_SOMATIC_STRELKA.out.tbi, by: [0]) + .mix(ch_somatic_branched.other) + + // + // MERGE GERMLINE AND SOMATIC VCFs + // + TABIX_GERMLINE(ch_germline_vcf) + ch_versions = ch_versions.mix(TABIX_GERMLINE.out.versions) + ch_germline_vcf_tbi = ch_germline_vcf.join(TABIX_GERMLINE.out.tbi, by: [0]) + + def somatic_with_key = ch_somatic_vcf_conc.map { meta, vcf, tbi -> + [[id: meta.normal_id, variantcaller: meta.variantcaller], meta, vcf, tbi] + } + + def germline_with_key = ch_germline_vcf_tbi.map { meta, vcf, tbi -> + [[id: meta.id, variantcaller: meta.variantcaller], meta, vcf, tbi] + } + + def matching_pairs = somatic_with_key.join(germline_with_key, failOnMismatch: false) + + // Branch based on whether a matching germline VCF was found + def branched = matching_pairs.branch { items -> + matched: items.size() == 7 + unmatched: items.size() == 4 + } + + MERGE_GERMLINE_SOMATIC_VCFS( + branched.matched.map { _key, meta_somatic, somatic_vcf, somatic_tbi, _meta_germline, germline_vcf, germline_tbi -> + [meta_somatic, [somatic_vcf, germline_vcf], [somatic_tbi, germline_tbi]] + }, + ch_fasta, + ch_fasta_fai, + [[], []], + ) + + ch_versions = ch_versions.mix(MERGE_GERMLINE_SOMATIC_VCFS.out.versions) + + // Combine merged VCFs with unmatched somatic VCFs + ch_vcf = MERGE_GERMLINE_SOMATIC_VCFS.out.vcf.mix( + branched.unmatched.map { _key, meta, vcf, _tbi -> [meta, vcf] } + ) + + // + // CHUNK VCF FILES + // + RBT_VCFSPLIT( + ch_vcf, + val_num_chunks, + ) + + // + // SPLIT VCF CHUNKS - create chunked VCFs for both tumor and normal preprocessing + // + ch_chunked_tumor_vcfs = RBT_VCFSPLIT.out.bcfchunks + .transpose() + .map { meta, vcf_chunked -> + [ + meta + [chunk: vcf_chunked.name.split(/\./)[-2]], + vcf_chunked, + ] + } + + // Create chunked VCFs with normal metadata for normal preprocessing + ch_chunked_normal_vcfs = RBT_VCFSPLIT.out.bcfchunks + .transpose() + .map { meta, vcf_chunked -> + [ + meta + [match_id: meta.normal_id, chunk: vcf_chunked.name.split(/\./)[-2]], + vcf_chunked, + ] + } + + // + // PREPROCESS VCF WITH TUMOR CRAM + // + + // Create base channels for data that will be replicated for each chunk + ch_cram_tumor = cram_tumor + .join(ALIGNMENTPROPERTIES_TUMOR.out.alignment_properties_json, failOnMismatch: true, failOnDuplicate: true) + .map { meta, cram, crai, json -> [meta.id, meta, cram, crai, json] } + + ch_input_tumor_preprocess_chunked = ch_chunked_tumor_vcfs + .map { meta, vcf -> [meta.id, meta, vcf] } + .combine(ch_cram_tumor, by: 0) + .combine(ch_fasta) + .combine(ch_fasta_fai) + .map { _id, meta_vcf, vcf, meta_cram, tumor_cram, tumor_crai, alignment_json, _meta_fasta, fasta, _meta_fai, fai -> + [ + meta_cram + [ + variantcaller: meta_vcf.variantcaller, + postprocess: 'varlociraptor', + chunk: meta_vcf.chunk, + ], + tumor_cram, + tumor_crai, + vcf, + alignment_json, + fasta, + fai, + ] + } + + PREPROCESS_TUMOR( + ch_input_tumor_preprocess_chunked + ) + + // + // PREPROCESS VCF WITH NORMAL CRAM + // + + // Create base channels for data that will be replicated for each chunk + ch_cram_alignment = cram_normal + .join(ALIGNMENTPROPERTIES_NORMAL.out.alignment_properties_json, failOnMismatch: true, failOnDuplicate: true) + .map { meta, cram, crai, json -> [meta.match_id, meta, cram, crai, json] } + + ch_input_normal_preprocess_chunked = ch_chunked_normal_vcfs + .map { meta, vcf -> [meta.match_id, meta, vcf] } + .combine(ch_cram_alignment, by: 0) + .combine(ch_fasta) + .combine(ch_fasta_fai) + .map { _match_id, meta_vcf, vcf, meta_cram, normal_cram, normal_crai, alignment_json, _meta_fasta, fasta, _meta_fai, fai -> + [ + meta_vcf + [ + id: meta_cram.id, + postprocess: 'varlociraptor', + ], + normal_cram, + normal_crai, + vcf, + alignment_json, + fasta, + fai, + ] + } + + PREPROCESS_NORMAL( + ch_input_normal_preprocess_chunked + ) + + // + // CALL VARIANTS WITH VARLOCIRAPTOR + // + ch_normal_for_join = PREPROCESS_NORMAL.out.bcf + .map { meta, normal_bcf -> [[meta.patient, meta.match_id, meta.chunk, meta.variantcaller], meta, normal_bcf] } + + ch_tumor_for_join = PREPROCESS_TUMOR.out.bcf + .map { meta, tumor_bcf -> [[meta.patient, meta.normal_id, meta.chunk, meta.variantcaller], meta, tumor_bcf] } + + + ch_normal_tumor_for_join = ch_normal_for_join + .join( + ch_tumor_for_join, + by: [0], + failOnMismatch: true, + failOnDuplicate: true, + ) + .map { _id, meta_normal, normal_bcf, _meta_tumor, tumor_bcf -> + [meta_normal, [normal_bcf, tumor_bcf]] + } + + ch_vcf_for_callvariants = ch_normal_tumor_for_join + .map { meta, bcfs -> + [meta.id, meta, bcfs] + } + .combine( + ch_scenario_file.map { meta, scenario_file -> [meta.id, scenario_file] }, + by: 0 + ) + .map { _id, meta, bcfs, scenario_file -> + [meta, bcfs, scenario_file, ["normal", "tumor"]] + } + + VARLOCIRAPTOR_CALLVARIANTS( + ch_vcf_for_callvariants + ) + + // + // SORT AND MERGE CALLED VARIANTS + // + SORT_CALLED_CHUNKS( + VARLOCIRAPTOR_CALLVARIANTS.out.bcf + ) + ch_versions = ch_versions.mix(SORT_CALLED_CHUNKS.out.versions) + + ch_sort_called_chunks_vcf = SORT_CALLED_CHUNKS.out.vcf.branch { + single: val_num_chunks <= 1 + multiple: val_num_chunks > 1 + } + + ch_sort_called_chunks_tbi = SORT_CALLED_CHUNKS.out.tbi.branch { + single: val_num_chunks <= 1 + multiple: val_num_chunks > 1 + } + + ch_vcf_tbi_chunks = ch_sort_called_chunks_vcf.multiple + .join(ch_sort_called_chunks_tbi.multiple, by: 0, failOnMismatch: true, failOnDuplicate: true) + .map { meta, vcf, tbi -> + [meta - meta.subMap("chunk"), vcf, tbi] + } + .groupTuple(size: val_num_chunks) + + CONCAT_CALLED_CHUNKS(ch_vcf_tbi_chunks) + + ch_versions = ch_versions.mix(CONCAT_CALLED_CHUNKS.out.versions) + + ch_final_vcf = ch_sort_called_chunks_vcf.single.mix(CONCAT_CALLED_CHUNKS.out.vcf) + + SORT_FINAL_VCF(ch_final_vcf) + + ch_versions = ch_versions.mix(SORT_FINAL_VCF.out.versions) + + emit: + vcf = SORT_FINAL_VCF.out.vcf + tbi = SORT_FINAL_VCF.out.tbi + versions = ch_versions +} diff --git a/subworkflows/nf-core/annotation_ensemblvep/main.nf b/subworkflows/nf-core/annotation_ensemblvep/main.nf deleted file mode 100644 index d8f21f10aa..0000000000 --- a/subworkflows/nf-core/annotation_ensemblvep/main.nf +++ /dev/null @@ -1,26 +0,0 @@ -// -// Run VEP to annotate VCF files -// - -include { ENSEMBLVEP } from '../../../modules/nf-core/modules/ensemblvep/main' -include { TABIX_BGZIPTABIX as ANNOTATION_BGZIPTABIX } from '../../../modules/nf-core/modules/tabix/bgziptabix/main' - -workflow ANNOTATION_ENSEMBLVEP { - take: - vcf // channel: [ val(meta), vcf ] - vep_genome // value: which genome - vep_species // value: which species - vep_cache_version // value: which cache version - vep_cache // path: path_to_vep_cache (optionnal) - - main: - ENSEMBLVEP(vcf, vep_genome, vep_species, vep_cache_version, vep_cache) - ANNOTATION_BGZIPTABIX(ENSEMBLVEP.out.vcf) - - ch_versions = ENSEMBLVEP.out.versions.first().mix(ANNOTATION_BGZIPTABIX.out.versions.first()) - - emit: - vcf_tbi = ANNOTATION_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] - reports = ENSEMBLVEP.out.report // path: *.html - versions = ch_versions // path: versions.yml -} diff --git a/subworkflows/nf-core/annotation_ensemblvep/meta.yml b/subworkflows/nf-core/annotation_ensemblvep/meta.yml deleted file mode 100644 index e7d92ce9cd..0000000000 --- a/subworkflows/nf-core/annotation_ensemblvep/meta.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: annotation_ensemblvep -description: | - Perform annotation with ensemblvep and bgzip + tabix index the resulting VCF file -keywords: - - ensemblvep -modules: - - ensemblvep - - tabix/bgziptabix -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - input: - type: vcf - description: list containing one vcf file - pattern: "[ *.{vcf,vcf.gz} ]" -output: - - versions: - type: file - description: File containing software versions - pattern: 'versions.yml' - - vcf_tbi: - type: file - description: Compressed vcf file + tabix index - pattern: "[ *{.vcf.gz,vcf.gz.tbi} ]" -authors: - - '@maxulysse' diff --git a/subworkflows/nf-core/annotation_snpeff/main.nf b/subworkflows/nf-core/annotation_snpeff/main.nf deleted file mode 100644 index d2625c1d9c..0000000000 --- a/subworkflows/nf-core/annotation_snpeff/main.nf +++ /dev/null @@ -1,23 +0,0 @@ -// -// Run SNPEFF to annotate VCF files -// - -include { SNPEFF } from '../../../modules/nf-core/modules/snpeff/main' -include { TABIX_BGZIPTABIX as ANNOTATION_BGZIPTABIX } from '../../../modules/nf-core/modules/tabix/bgziptabix/main' - -workflow ANNOTATION_SNPEFF { - take: - vcf // channel: [ val(meta), vcf ] - snpeff_db // value: version of db to use - snpeff_cache // path: path_to_snpeff_cache (optionnal) - - main: - SNPEFF(vcf, snpeff_db, snpeff_cache) - ANNOTATION_BGZIPTABIX(SNPEFF.out.vcf) - ch_versions = SNPEFF.out.versions.first().mix(ANNOTATION_BGZIPTABIX.out.versions.first()) - - emit: - vcf_tbi = ANNOTATION_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] - reports = SNPEFF.out.report // path: *.html - versions = ch_versions // path: versions.yml -} diff --git a/subworkflows/nf-core/annotation_snpeff/meta.yml b/subworkflows/nf-core/annotation_snpeff/meta.yml deleted file mode 100644 index 164a0ee27a..0000000000 --- a/subworkflows/nf-core/annotation_snpeff/meta.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: annotation_snpeff -description: | - Perform annotation with snpeff and bgzip + tabix index the resulting VCF file -keywords: - - snpeff -modules: - - snpeff - - tabix/bgziptabix -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - input: - type: vcf - description: list containing one vcf file - pattern: "[ *.{vcf,vcf.gz} ]" -output: - - versions: - type: file - description: File containing software versions - pattern: 'versions.yml' - - vcf_tbi: - type: file - description: Compressed vcf file + tabix index - pattern: "[ *{.vcf.gz,vcf.gz.tbi} ]" -authors: - - '@maxulysse' diff --git a/subworkflows/nf-core/bam_ngscheckmate/main.nf b/subworkflows/nf-core/bam_ngscheckmate/main.nf new file mode 100644 index 0000000000..d698dd3f24 --- /dev/null +++ b/subworkflows/nf-core/bam_ngscheckmate/main.nf @@ -0,0 +1,47 @@ +include { BCFTOOLS_MPILEUP } from '../../../modules/nf-core/bcftools/mpileup/main' +include { NGSCHECKMATE_NCM } from '../../../modules/nf-core/ngscheckmate/ncm/main' + +workflow BAM_NGSCHECKMATE { + + take: + ch_input // channel: [ val(meta1), bam/cram ] + ch_snp_bed // channel: [ val(meta2), bed ] + ch_fasta // channel: [ val(meta3), fasta ] + + main: + + ch_versions = Channel.empty() + ch_input_bed = ch_input.combine(ch_snp_bed) + // do something to combine the metas? + .map{ input_meta, input_file, _bed_meta, bed_file -> + [input_meta, input_file, bed_file] + } + + BCFTOOLS_MPILEUP (ch_input_bed, ch_fasta.collect(), false) + ch_versions = ch_versions.mix(BCFTOOLS_MPILEUP.out.versions) + + BCFTOOLS_MPILEUP + .out + .vcf + .map{_meta, vcf -> vcf} // discard individual metas + .collect() // group into one channel + .map{files -> [files]} // make the channel into [vcf1, vcf2, ...] + .set {ch_collected_vcfs} + + ch_snp_bed + .map{meta, _bed -> meta} // use the snp_bed file meta as the meta for the merged channel + .combine(ch_collected_vcfs) // add the vcf files after the meta, now looks like [meta, [vcf1, vcf2, ... ] ] + .set {ch_vcfs} + + NGSCHECKMATE_NCM (ch_vcfs, ch_snp_bed, ch_fasta) + ch_versions = ch_versions.mix(NGSCHECKMATE_NCM.out.versions) + + emit: + corr_matrix = NGSCHECKMATE_NCM.out.corr_matrix // channel: [ meta, corr_matrix ] + matched = NGSCHECKMATE_NCM.out.matched // channel: [ meta, matched ] + all = NGSCHECKMATE_NCM.out.all // channel: [ meta, all ] + vcf = BCFTOOLS_MPILEUP.out.vcf // channel: [ meta, vcf ] + pdf = NGSCHECKMATE_NCM.out.pdf // channel: [ meta, pdf ] + versions = ch_versions // channel: [ versions.yml ] + +} diff --git a/subworkflows/nf-core/bam_ngscheckmate/meta.yml b/subworkflows/nf-core/bam_ngscheckmate/meta.yml new file mode 100644 index 0000000000..7de0a114d4 --- /dev/null +++ b/subworkflows/nf-core/bam_ngscheckmate/meta.yml @@ -0,0 +1,68 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "bam_ngscheckmate" +description: Take a set of bam files and run NGSCheckMate to determine whether samples match with each other, using a set of SNPs. +keywords: + - ngscheckmate + - qc + - bam + - snp +components: + - bcftools/mpileup + - ngscheckmate/ncm +input: + - meta1: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: BAM files for each sample + pattern: "*.{bam}" + - meta2: + type: map + description: | + Groovy Map containing bed file information + e.g. [ id:'sarscov2' ] + - snp_bed: + type: file + description: BED file containing the SNPs to analyse. NGSCheckMate provides some default ones for hg19/hg38. + pattern: "*.{bed}" + - meta3: + type: map + description: | + Groovy Map containing reference genome meta information + e.g. [ id:'sarscov2' ] + - fasta: + type: file + description: fasta file for the genome + pattern: "*.{fasta}" +output: + - pdf: + type: file + description: A pdf containing a dendrogram showing how the samples match up + pattern: "*.{pdf}" + - corr_matrix: + type: file + description: A text file containing the correlation matrix between each sample + pattern: "*corr_matrix.txt" + - matched: + type: file + description: A txt file containing only the samples that match with each other + pattern: "*matched.txt" + - all: + type: file + description: A txt file containing all the sample comparisons, whether they match or not + pattern: "*all.txt" + - vcf: + type: file + description: vcf files for each sample giving the SNP calls + pattern: "*.vcf" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@SPPearce" +maintainers: + - "@SPPearce" diff --git a/subworkflows/nf-core/bam_ngscheckmate/nextflow.config b/subworkflows/nf-core/bam_ngscheckmate/nextflow.config new file mode 100644 index 0000000000..cad9f57cc7 --- /dev/null +++ b/subworkflows/nf-core/bam_ngscheckmate/nextflow.config @@ -0,0 +1,13 @@ +// IMPORTANT: Add this configuration to your modules.config + +process { + withName: ".*BAM_NGSCHECKMATE:BCFTOOLS_MPILEUP" { + ext.args2 = '--no-version --ploidy 1 -c' + ext.args3 = '--no-version' + } + + withName: ".*BAM_NGSCHECKMATE:NGSCHECKMATE_NCM" { + ext.args = '-V' + } + +} diff --git a/subworkflows/nf-core/fastqc_trimgalore.nf b/subworkflows/nf-core/fastqc_trimgalore.nf deleted file mode 100644 index af31e3ed7a..0000000000 --- a/subworkflows/nf-core/fastqc_trimgalore.nf +++ /dev/null @@ -1,49 +0,0 @@ -// -// Read QC and trimming -// - -include { FASTQC } from '../../modules/nf-core/modules/fastqc/main' -include { TRIMGALORE } from '../../modules/nf-core/modules/trimgalore/main' - -workflow FASTQC_TRIMGALORE { - take: - reads // channel: [ val(meta), [ reads ] ] - skip_fastqc // boolean: true/false - skip_trimming // boolean: true/false - - main: - ch_versions = Channel.empty() - fastqc_html = Channel.empty() - fastqc_zip = Channel.empty() - - if (!skip_fastqc) { - FASTQC ( reads ).html.set { fastqc_html } - fastqc_zip = FASTQC.out.zip - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - } - - trim_reads = reads - trim_html = Channel.empty() - trim_zip = Channel.empty() - trim_log = Channel.empty() - - if (!skip_trimming) { - TRIMGALORE ( reads ).reads.set { trim_reads } - trim_html = TRIMGALORE.out.html - trim_zip = TRIMGALORE.out.zip - trim_log = TRIMGALORE.out.log - ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first()) - } - - emit: - reads = trim_reads // channel: [ val(meta), [ reads ] ] - - fastqc_html // channel: [ val(meta), [ html ] ] - fastqc_zip // channel: [ val(meta), [ zip ] ] - - trim_html // channel: [ val(meta), [ html ] ] - trim_zip // channel: [ val(meta), [ zip ] ] - trim_log // channel: [ val(meta), [ txt ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/fgbio_create_umi_consensus/main.nf b/subworkflows/nf-core/fgbio_create_umi_consensus/main.nf deleted file mode 100644 index f8c2eaad61..0000000000 --- a/subworkflows/nf-core/fgbio_create_umi_consensus/main.nf +++ /dev/null @@ -1,77 +0,0 @@ -// -// Runs FGBIO tools to remove UMI tags from FASTQ reads -// Convert them to unmapped BAM file, map them to the reference genome, -// use the mapped information to group UMIs and generate consensus reads -// - - -include { BWAMEM2_MEM } from '../../../modules/nf-core/modules/bwamem2/mem/main' -include { BWA_MEM as BWAMEM1_MEM } from '../../../modules/nf-core/modules/bwa/mem/main' -include { FGBIO_CALLMOLECULARCONSENSUSREADS as CALLUMICONSENSUS } from '../../../modules/nf-core/modules/fgbio/callmolecularconsensusreads/main.nf' -include { FGBIO_FASTQTOBAM as FASTQTOBAM } from '../../../modules/nf-core/modules/fgbio/fastqtobam/main' -include { FGBIO_GROUPREADSBYUMI as GROUPREADSBYUMI } from '../../../modules/nf-core/modules/fgbio/groupreadsbyumi/main' -include { SAMBLASTER } from '../../../modules/nf-core/modules/samblaster/main' -include { SAMTOOLS_BAM2FQ as BAM2FASTQ } from '../../../modules/nf-core/modules/samtools/bam2fq/main.nf' - -workflow CREATE_UMI_CONSENSUS { - take: - reads // channel: [mandatory] [ val(meta), [ reads ] ] - fasta // channel: [mandatory] /path/to/reference/fasta - bwa // channel: [mandatory] Pre-computed BWA index (either bwa-mem or bwa-mem2; MUST be matching to chosen aligner) - read_structure // string: [mandatory] "read_structure" - groupreadsbyumi_strategy // string: [mandatory] grouping strategy - default: "Adjacency" - aligner // string: [mandatory] "bwa-mem" or "bwa-mem2" - - main: - ch_versions = Channel.empty() - - // using information in val(read_structure) FASTQ reads are converted into - // a tagged unmapped BAM file (uBAM) - FASTQTOBAM ( reads, read_structure ) - ch_versions = ch_versions.mix(FASTQTOBAM.out.versions) - - // in order to map uBAM using BWA MEM, we need to convert uBAM to FASTQ - // but keep the appropriate UMI tags in the FASTQ comment field and produce - // an interleaved FASQT file (hence, split = false) - split = false - BAM2FASTQ ( FASTQTOBAM.out.umibam, split ) - ch_versions = ch_versions.mix(BAM2FASTQ.out.versions) - - // the user can choose here to use either bwa-mem (default) or bwa-mem2 - aligned_bam = Channel.empty() - - if (aligner == "bwa-mem") { - - // appropriately tagged interleaved FASTQ reads are mapped to the reference - BWAMEM1_MEM ( BAM2FASTQ.out.reads, bwa, false ) - ch_versions = ch_versions.mix(BWAMEM1_MEM.out.versions) - aligned_bam = BWAMEM1_MEM.out.bam - } else { - - // appropriately tagged interleaved FASTQ reads are mapped to the reference - BWAMEM2_MEM ( BAM2FASTQ.out.reads, bwa, false ) - ch_versions = ch_versions.mix(BWAMEM2_MEM.out.versions) - aligned_bam = BWAMEM2_MEM.out.bam - } - - // samblaster is used in order to tag mates information in the BAM file - // this is used in order to group reads by UMI - SAMBLASTER ( aligned_bam ) - ch_versions = ch_versions.mix(SAMBLASTER.out.versions) - - // appropriately tagged reads are now grouped by UMI information - GROUPREADSBYUMI ( SAMBLASTER.out.bam, groupreadsbyumi_strategy ) - ch_versions = ch_versions.mix(GROUPREADSBYUMI.out.versions) - - // using the above created groups, a consensus across reads in the same grou - // can be called - // this will emit a consensus BAM file - CALLUMICONSENSUS ( GROUPREADSBYUMI.out.bam ) - ch_versions = ch_versions.mix(CALLUMICONSENSUS.out.versions) - - emit: - umibam = FASTQTOBAM.out.umibam // channel: [ val(meta), [ bam ] ] - groupbam = GROUPREADSBYUMI.out.bam // channel: [ val(meta), [ bam ] ] - consensusbam = CALLUMICONSENSUS.out.bam // channel: [ val(meta), [ bam ] ] - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/gatk4_mapping/main.nf b/subworkflows/nf-core/gatk4_mapping/main.nf deleted file mode 100644 index 075d171ff7..0000000000 --- a/subworkflows/nf-core/gatk4_mapping/main.nf +++ /dev/null @@ -1,110 +0,0 @@ -// -// MAPPING -// - -include { BWAMEM2_MEM } from '../../../modules/nf-core/modules/bwamem2/mem/main' -include { BWA_MEM as BWAMEM1_MEM } from '../../../modules/nf-core/modules/bwa/mem/main' -include { SAMTOOLS_INDEX as INDEX_MAPPING } from '../../../modules/local/samtools/index/main' -include { SAMTOOLS_MERGE } from '../../../modules/nf-core/modules/samtools/merge/main' -include { SEQKIT_SPLIT2 } from '../../../modules/nf-core/modules/seqkit/split2/main' - -workflow GATK4_MAPPING { - take: - aligner // string: [mandatory] "bwa-mem" or "bwa-mem2" - bwa // channel: [mandatory] bwa - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fasta_fai - reads_input // channel: [mandatory] meta, reads_input - skip_markduplicates // boolean: true/false - save_bam_mapped // boolean: true/false - - main: - - bam_indexed = Channel.empty() - - if (params.split_fastq > 1) { - reads_input_split = SEQKIT_SPLIT2(reads_input).reads.map{ key, reads -> - //TODO maybe this can be replaced by a regex to include part_001 etc. - - //sorts list of split fq files by : - //[R1.part_001, R2.part_001, R1.part_002, R2.part_002,R1.part_003, R2.part_003,...] - //TODO: determine whether it is possible to have an uneven number of parts, so remainder: true woud need to be used, I guess this could be possible for unfiltered reads, reads that don't have pairs etc. - read_files = reads.sort{ a,b -> a.getName().tokenize('.')[ a.getName().tokenize('.').size() - 3] <=> b.getName().tokenize('.')[ b.getName().tokenize('.').size() - 3]}.collate(2) - key.size = read_files.size() - [key, read_files] - }.transpose() - } else { - reads_input_split = reads_input.map{ meta, reads -> - meta.size = 1 - [meta, reads] - } - } - - - - bam_bwamem1 = Channel.empty() - bam_bwamem2 = Channel.empty() - bam_from_aligner = Channel.empty() - tool_versions = Channel.empty() - - if (aligner == "bwa-mem") { - BWAMEM1_MEM(reads_input_split, bwa, true) - - bam_bwamem1 = BWAMEM1_MEM.out.bam - - bwamem1_version = BWAMEM1_MEM.out.versions.first() - - tool_versions = tool_versions.mix(bwamem1_version) - } else { - BWAMEM2_MEM(reads_input_split, bwa, true) - - bam_bwamem2 = BWAMEM2_MEM.out.bam - - bwamem2_version = BWAMEM2_MEM.out.versions.first() - - tool_versions = tool_versions.mix(bwamem2_version) - } - - bam_from_aligner = bam_from_aligner.mix(bam_bwamem1) - bam_from_aligner = bam_from_aligner.mix(bam_bwamem2) - - bam_from_aligner.map{ meta, bam -> - new_meta = meta.clone() - new_meta.remove('read_group') - new_meta.remove('size') - new_meta.id = meta.sample - - // groupKey is to makes sure that the correct group can advance as soon as it is complete - // and not stall the workflow until all pieces are mapped - def groupKey = groupKey(meta, meta.numLanes * meta.size) - - //Returns the values we need - tuple(groupKey, new_meta, bam) - }.groupTuple(by:[0,1]).map{ - groupKey, new_meta, bam -> - [new_meta, bam] - }.set{bam_mapped} - - // GATK markduplicates can handle multiple BAMS as input - // So no merging/indexing at this step - // Except if and only if skipping markduplicates - // Or saving mapped BAMs - - if (save_bam_mapped || skip_markduplicates) { - bam_mapped.branch{ - single: it[1].size() == 1 - multiple: it[1].size() > 1 - }.set{bam_to_merge} - - SAMTOOLS_MERGE(bam_to_merge.multiple, []) - bam_merged = bam_to_merge.single.mix(SAMTOOLS_MERGE.out.bam) - - INDEX_MAPPING(bam_merged) - bam_indexed = INDEX_MAPPING.out.bam_bai - } - - emit: - bam = bam_mapped - bam_indexed = bam_indexed - versions = tool_versions -} diff --git a/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/main.nf deleted file mode 100644 index 6866a6d707..0000000000 --- a/subworkflows/nf-core/gatk_tumor_normal_somatic_variant_calling/main.nf +++ /dev/null @@ -1,171 +0,0 @@ -// -// Run GATK mutect2 in tumor normal mode, getepileupsummaries, calculatecontamination, learnreadorientationmodel and filtermutectcalls -// - -include { BGZIP as BGZIP_MUTECT2 } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_VCF_MUTECT2 } from '../../../modules/local/concat_vcf/main' - -include { GATK4_MUTECT2 as MUTECT2 } from '../../../modules/nf-core/modules/gatk4/mutect2/main' -include { GATK4_MERGEMUTECTSTATS as MERGEMUTECTSTATS } from '../../../modules/local/gatk4/mergemutectstats' -include { GATK4_LEARNREADORIENTATIONMODEL as LEARNREADORIENTATIONMODEL } from '../../../modules/nf-core/modules/gatk4/learnreadorientationmodel/main' -include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES_TUMOR } from '../../../modules/local/gatk4/gatherpileupsummaries' -include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES_NORMAL} from '../../../modules/local/gatk4/gatherpileupsummaries' -include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_TUMOR } from '../../../modules/nf-core/modules/gatk4/getpileupsummaries/main' -include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_NORMAL } from '../../../modules/nf-core/modules/gatk4/getpileupsummaries/main' -include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../modules/nf-core/modules/gatk4/calculatecontamination/main' -include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../modules/nf-core/modules/gatk4/filtermutectcalls/main' - -workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { - take: - input // channel: [ val(meta), [ input ], [ input_index ], [which_norm] ] - fasta // channel: /path/to/reference/fasta - fai // channel: /path/to/reference/fasta/index - dict // channel: /path/to/reference/fasta/dictionary - germline_resource // channel: /path/to/germline/resource - germline_resource_tbi // channel: /path/to/germline/index - panel_of_normals // channel: /path/to/panel/of/normals - panel_of_normals_tbi // channel: /path/to/panel/of/normals/index - no_intervals - num_intervals - intervals_bed_combine_gz - - main: - ch_versions = Channel.empty() - - // - //Perform variant calling using mutect2 module in tumor single mode. - // - MUTECT2 ( input, false, false, false, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi ) - ch_versions = ch_versions.mix(MUTECT2.out.versions) - - // - //Generate pileup summary tables using getepileupsummaries. tumor sample should always be passed in as the first input and input list entries of ch_mutect2_in, - //to ensure correct file order for calculatecontamination. - - pileup_tumor_input = input.map { - meta, input_list, input_index_list, intervals, which_norm -> - tumor_id = meta.tumor_id - id = intervals ? tumor_id + "_" + intervals.baseName : tumor_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], input_list[1], input_index_list[1], intervals] - } - - pileup_normal_input = input.map { - meta, input_list, input_index_list, intervals, which_norm -> - normal_id = meta.normal_id - id = intervals ? normal_id + "_" + intervals.baseName : normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], input_list[0], input_index_list[0], intervals] - } - GETPILEUPSUMMARIES_TUMOR ( pileup_tumor_input, fasta, fai, dict, germline_resource, germline_resource_tbi ) - GETPILEUPSUMMARIES_NORMAL ( pileup_normal_input, fasta, fai, dict, germline_resource, germline_resource_tbi ) - ch_versions = ch_versions.mix(GETPILEUPSUMMARIES_NORMAL.out.versions) - - if(no_intervals){ - mutect2_vcf_gz_tbi = MUTECT2.out.vcf.join(MUTECT2.out.tbi) - mutect2_stats = MUTECT2.out.stats - pileup_table_tumor= GETPILEUPSUMMARIES_TUMOR.out.table - pileup_table_normal= GETPILEUPSUMMARIES_NORMAL.out.table - - }else{ - - //Merge Mutect2 VCF - BGZIP_MUTECT2(MUTECT2.out.vcf) - - BGZIP_MUTECT2.out.vcf.map{ meta, vcf -> - id = meta.tumor_id + "_vs_" + meta.normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], vcf] - }.set{bgzip_mutect2} - - mutect2_vcf_to_concat = bgzip_mutect2.groupTuple(size: num_intervals) - - CONCAT_VCF_MUTECT2(mutect2_vcf_to_concat, fai, intervals_bed_combine_gz) - mutect2_vcf_gz_tbi = CONCAT_VCF_MUTECT2.out.vcf.join(CONCAT_VCF_MUTECT2.out.tbi) - - ch_versions = ch_versions.mix(BGZIP_MUTECT2.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_MUTECT2.out.versions) - - //Merge Muteect2 Stats - MUTECT2.out.stats.map{ meta, stats -> - id = meta.tumor_id + "_vs_" + meta.normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], stats] - }.groupTuple(size: num_intervals).set{mutect2_stats_to_merge} - - MERGEMUTECTSTATS(mutect2_stats_to_merge) - mutect2_stats = MERGEMUTECTSTATS.out.stats - ch_versions = ch_versions.mix(MERGEMUTECTSTATS.out.versions) - - //Merge Pileup Summaries - pileup_tumor_tables_to_gather = GETPILEUPSUMMARIES_TUMOR.out.table.map{ meta, table -> - [[id: meta.tumor_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] - }.groupTuple(size: num_intervals) - - GATHERPILEUPSUMMARIES_TUMOR(pileup_tumor_tables_to_gather, dict) - GATHERPILEUPSUMMARIES_TUMOR.out.table.map{ meta, table -> - id = meta.tumor_id + "_vs_" + meta.normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] - }.set{pileup_table_tumor} - - pileup_normal_tables_to_gather = GETPILEUPSUMMARIES_NORMAL.out.table.map{ meta, table -> - [[id: meta.normal_id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] - }.groupTuple(size: num_intervals) - - GATHERPILEUPSUMMARIES_NORMAL(pileup_normal_tables_to_gather, dict) - GATHERPILEUPSUMMARIES_NORMAL.out.table.map{ meta, table -> - id = meta.tumor_id + "_vs_" + meta.normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], table] - }.set{pileup_table_normal} - - } - - // - //Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2. - // - MUTECT2.out.f1r2.map{ meta, f1f2 -> - id = meta.tumor_id + "_vs_" + meta.normal_id - [[id: id, normal_id: meta.normal_id, tumor_id: meta.tumor_id, gender: meta.gender, patient: meta.patient ], f1f2] - }.groupTuple(size: num_intervals) - .set{ch_learnread_in} - - LEARNREADORIENTATIONMODEL (ch_learnread_in) - - ch_versions = ch_versions.mix(LEARNREADORIENTATIONMODEL.out.versions) - - // - //Contamination and segmentation tables created using calculatecontamination on the pileup summary table. - // - ch_calccon_in = pileup_table_tumor.join(pileup_table_normal) - CALCULATECONTAMINATION ( ch_calccon_in, true ) - ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions) - - // - //Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables. - // - ch_filtermutect = mutect2_vcf_gz_tbi.join(mutect2_stats) - .join(LEARNREADORIENTATIONMODEL.out.artifactprior) - .join(CALCULATECONTAMINATION.out.segmentation) - .join(CALCULATECONTAMINATION.out.contamination) - ch_filtermutect.map{ meta, vcf, tbi, stats, orientation, seg, cont -> - [meta, vcf, tbi, stats, orientation, seg, cont, []] - }.set{ch_filtermutect_in} - - FILTERMUTECTCALLS ( ch_filtermutect_in, fasta, fai, dict ) - ch_versions = ch_versions.mix(FILTERMUTECTCALLS.out.versions) - - emit: - mutect2_vcf_gz_tbi = mutect2_vcf_gz_tbi // channel: [ val(meta), [ vcf ] ] - mutect2_stats = mutect2_stats // channel: [ val(meta), [ stats ] ] - //mutect2_f1r2 = MUTECT2.out.f1r2 // channel: [ val(meta), [ f1r2 ] ] - - artifact_priors = LEARNREADORIENTATIONMODEL.out.artifactprior // channel: [ val(meta), [ artifactprior ] ] - - pileup_table_tumor = pileup_table_tumor // channel: [ val(meta), [ table_tumor ] ] - pileup_table_normal = pileup_table_normal // channel: [ val(meta), [ table_normal ] ] - - contamination_table = CALCULATECONTAMINATION.out.contamination // channel: [ val(meta), [ contamination ] ] - segmentation_table = CALCULATECONTAMINATION.out.segmentation // channel: [ val(meta), [ segmentation ] ] - - filtered_vcf = FILTERMUTECTCALLS.out.vcf // channel: [ val(meta), [ vcf ] ] - filtered_tbi = FILTERMUTECTCALLS.out.tbi // channel: [ val(meta), [ tbi ] ] - filtered_stats = FILTERMUTECTCALLS.out.stats // channel: [ val(meta), [ stats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/gatk_tumor_only_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk_tumor_only_somatic_variant_calling/main.nf deleted file mode 100644 index 98c0f46d1d..0000000000 --- a/subworkflows/nf-core/gatk_tumor_only_somatic_variant_calling/main.nf +++ /dev/null @@ -1,127 +0,0 @@ -// -// Run GATK mutect2 in tumor only mode, getepileupsummaries, calculatecontamination and filtermutectcalls -// - -include { BGZIP as BGZIP_MUTECT2 } from '../../../modules/local/bgzip' -include { CONCAT_VCF as CONCAT_VCF_MUTECT2 } from '../../../modules/local/concat_vcf/main' - -include { GATK4_MUTECT2 as MUTECT2 } from '../../../modules/nf-core/modules/gatk4/mutect2/main' -include { GATK4_MERGEMUTECTSTATS as MERGEMUTECTSTATS } from '../../../modules/local/gatk4/mergemutectstats' -include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES } from '../../../modules/nf-core/modules/gatk4/getpileupsummaries/main' -include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES } from '../../../modules/local/gatk4/gatherpileupsummaries' -include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../modules/nf-core/modules/gatk4/calculatecontamination/main' -include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../modules/nf-core/modules/gatk4/filtermutectcalls/main' - -workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { - take: - input // channel: [ val(meta), [ input ], [ input_index ], [intervals], [] ] - fasta // channel: /path/to/reference/fasta - fai // channel: /path/to/reference/fasta/index - dict // channel: /path/to/reference/fasta/dictionary - germline_resource // channel: /path/to/germline/resource - germline_resource_tbi // channel: /path/to/germline/index - panel_of_normals // channel: /path/to/panel/of/normals - panel_of_normals_tbi // channel: /path/to/panel/of/normals/index - num_intervals - no_intervals - intervals_bed_combine_gz - - - main: - ch_versions = Channel.empty() - - // - //Perform variant calling using mutect2 module in tumor single mode. - // - mutect2_vcf_gz_tbi = Channel.empty() - MUTECT2 ( input , true , false , false , fasta , fai , dict , germline_resource , germline_resource_tbi , panel_of_normals , panel_of_normals_tbi ) - ch_versions = ch_versions.mix(MUTECT2.out.versions) - - // - //Generate pileup summary table using getepileupsummaries. - // - pileup_input = input.map { - meta, input_file, input_index, intervals, which_norm -> - [meta, input_file, input_index, intervals] - } - GETPILEUPSUMMARIES ( pileup_input , fasta, fai, dict, germline_resource , germline_resource_tbi ) - ch_versions = ch_versions.mix(GETPILEUPSUMMARIES.out.versions) - - if(no_intervals){ - mutect2_vcf_gz_tbi = MUTECT2.out.vcf.join(MUTECT2.out.tbi) - mutect2_stats = MUTECT2.out.stats - pileup_table = GETPILEUPSUMMARIES.out.table - }else{ - - //Merge Mutect2 VCF - BGZIP_MUTECT2(MUTECT2.out.vcf) - BGZIP_MUTECT2.out.vcf.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.set{bgzip_mutect2} - - mutect2_vcf_to_concat = bgzip_mutect2.groupTuple(size: num_intervals) - - CONCAT_VCF_MUTECT2(mutect2_vcf_to_concat, fai, intervals_bed_combine_gz) - mutect2_vcf_gz_tbi = CONCAT_VCF_MUTECT2.out.vcf.join(CONCAT_VCF_MUTECT2.out.tbi) - - ch_versions = ch_versions.mix(BGZIP_MUTECT2.out.versions) - ch_versions = ch_versions.mix(CONCAT_VCF_MUTECT2.out.versions) - - //Merge Muteect2 Stats - MUTECT2.out.stats.map{ meta, stats -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, stats] - }.groupTuple(size: num_intervals).set{mutect2_stats_to_merge} - - MERGEMUTECTSTATS(mutect2_stats_to_merge) - mutect2_stats = MERGEMUTECTSTATS.out.stats - ch_versions = ch_versions.mix(MERGEMUTECTSTATS.out.versions) - - //Merge Pileup Summaries - pileup_tables_to_gather = GETPILEUPSUMMARIES.out.table.map{ meta, table -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, table] - }.groupTuple(size: num_intervals) - - GATHERPILEUPSUMMARIES(pileup_tables_to_gather, dict) - pileup_table = GATHERPILEUPSUMMARIES.out.table - - } - - // - //Contamination and segmentation tables created using calculatecontamination on the pileup summary table. - // - pileup_table.map{meta, table -> [meta, table, []]}.set{table_contamination} - CALCULATECONTAMINATION ( table_contamination, true ) - ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions) - - - // - //Mutect2 calls filtered by filtermutectcalls using the contamination and segmentation tables. - // - ch_filtermutect = mutect2_vcf_gz_tbi.join(mutect2_stats) - .join(CALCULATECONTAMINATION.out.segmentation) - .join(CALCULATECONTAMINATION.out.contamination) - ch_filtermutect_in = ch_filtermutect.map{ meta, vcf, tbi, stats, seg, cont -> [meta, vcf, tbi, stats, [], seg, cont, []] } - FILTERMUTECTCALLS ( ch_filtermutect_in, fasta, fai, dict ) - ch_versions = ch_versions.mix(FILTERMUTECTCALLS.out.versions) - - emit: - mutect2_vcf_gz_tbi = mutect2_vcf_gz_tbi // channel: [ val(meta), [ vcf ] ] - mutect2_stats = MUTECT2.out.stats // channel: [ val(meta), [ stats ] ] - - pileup_table = pileup_table // channel: [ val(meta), [ table ] ] - - contamination_table = CALCULATECONTAMINATION.out.contamination // channel: [ val(meta), [ contamination ] ] - segmentation_table = CALCULATECONTAMINATION.out.segmentation // channel: [ val(meta), [ segmentation ] ] - - filtered_vcf = FILTERMUTECTCALLS.out.vcf // channel: [ val(meta), [ vcf ] ] - filtered_index = FILTERMUTECTCALLS.out.tbi // channel: [ val(meta), [ tbi ] ] - filtered_stats = FILTERMUTECTCALLS.out.stats // channel: [ val(meta), [ stats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/joint_germline_variant_calling/main.nf b/subworkflows/nf-core/joint_germline_variant_calling/main.nf deleted file mode 100644 index 195c6bb92a..0000000000 --- a/subworkflows/nf-core/joint_germline_variant_calling/main.nf +++ /dev/null @@ -1,116 +0,0 @@ -// -// Run GATK haplotypecaller for all input samples, merge them with genomicsdbimport, perform joint genotyping with genotypeGVCFS and recalibrate with variantrecalibrator & applyvqsr. -// - -include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../../modules/nf-core/modules/gatk4/haplotypecaller/main' -include { GATK4_GENOMICSDBIMPORT as GENOMICSDBIMPORT } from '../../../modules/nf-core/modules/gatk4/genomicsdbimport/main' -include { GATK4_GENOTYPEGVCFS as GENOTYPEGVCFS } from '../../../modules/nf-core/modules/gatk4/genotypegvcfs/main' -include { GATK4_VARIANTRECALIBRATOR as VARIANTRECALIBRATOR } from '../../../modules/nf-core/modules/gatk4/variantrecalibrator/main' -include { GATK4_APPLYVQSR as APPLYVQSR } from '../../../modules/nf-core/modules/gatk4/applyvqsr/main' - -workflow GATK_JOINT_GERMLINE_VARIANT_CALLING { - take: - input // channel: [ val(meta), [ input ], [ input_index ], [] ] - run_haplotc // channel: true/false run haplotypecaller portion of workflow or skip to genomicsdbimport when false - run_vqsr // channel: true/false run vqsr portion of subworkflow - fasta // channel: /path/to/reference/fasta - fai // channel: /path/to/reference/fasta/index - dict // channel: /path/to/reference/fasta/dictionary - sites // channel: /path/to/known/sites/file - sites_index // channel: /path/to/known/sites/index - joint_id // channel: joint id to replace individual sample ids with - allelespecific // channel: true/false run allelespecific mode of vqsr modules - resources // channel: [[resource, vcfs, forvariantrecal], [resource, tbis, forvariantrecal], [resource, labels, forvariantrecal]] - annotation // channel: [annotations, to, use, for, variantrecal, filtering] - mode // channel: which mode to run variantrecal: SNP/INDEL/BOTH - create_rscript // channel: true/false whether to generate rscript plots in variantrecal - truthsensitivity // channel: 0-100.0 truthsensitivity cutoff for applyvqsr - - main: - ch_versions = Channel.empty() - - // haplotypecaller can be skipped if input samples are already in gvcf format, essentially making the subworkflow joint genotyping. - if (run_haplotc) { - haplotc_input = channel.from(input) - // - //Perform variant calling using haplotypecaller module. Additional argument "-ERC GVCF" used to run in gvcf mode. - // - HAPLOTYPECALLER ( haplotc_input, fasta, fai, dict, sites, sites_index ) - - ch_versions = ch_versions.mix(HAPLOTYPECALLER.out.versions.first()) - ch_vcf = HAPLOTYPECALLER.out.vcf.collect{it[1]}.toList() - ch_index = HAPLOTYPECALLER.out.tbi.collect{it[1]}.toList() - - } else { - // if haplotypecaller is skipped, this channels the input to genomicsdbimport instead of the output vcfs and tbis that normally come from haplotypecaller - direct_input = channel.from(input) - ch_vcf = direct_input.collect{it[1]}.toList() - ch_index = direct_input.collect{it[2]}.toList() - } - - // - //Convert all sample vcfs into a genomicsdb workspace using genomicsdbimport. - // - gendb_input = Channel.of([[ id:joint_id ]]).combine(ch_vcf).combine(ch_index).combine([interval_file]).combine(['']).combine([dict]) - - GENOMICSDBIMPORT ( gendb_input, false, false, false ) - - ch_versions = ch_versions.mix(GENOMICSDBIMPORT.out.versions) - - // - //Joint genotyping performed using GenotypeGVCFs - // - ch_genotype_in = GENOMICSDBIMPORT.out.genomicsdb.collect() - //[] is a placeholder for the input where the vcf tbi file would be passed in for non-genomicsdb workspace runs, which is not necessary for this workflow as it uses genomicsdb workspaces. - ch_genotype_in.add([]) - - GENOTYPEGVCFS ( ch_genotype_in, fasta, fai, dict, sites, sites_index ) - - ch_versions = ch_versions.mix(GENOTYPEGVCFS.out.versions) - - // setting run_vqsr to false skips the VQSR process, for if user does not wish to perform VQSR, - // or want to run the hard filtering recommended by gatk best practices for runs with a low number of samples instead. - if (run_vqsr) { - // - //Perform first step in VQSR using VariantRecalibrator - // - ch_gvcf = GENOTYPEGVCFS.out.vcf.collect() - ch_gtbi = GENOTYPEGVCFS.out.tbi.collect() - ch_vrecal_in = ch_gvcf.combine(ch_gtbi, by: 0) - - VARIANTRECALIBRATOR ( ch_vrecal_in, fasta, fai, dict, allelespecific, resources, annotation, mode, create_rscript ) - - ch_versions = ch_versions.mix(VARIANTRECALIBRATOR.out.versions) - - // - //Perform second step in VQSR using ApplyVQSR - // - ch_recal = VARIANTRECALIBRATOR.out.recal.collect() - ch_idx = VARIANTRECALIBRATOR.out.idx.collect() - ch_tranches = VARIANTRECALIBRATOR.out.tranches.collect() - ch_vqsr_in = ch_vrecal_in.combine(ch_recal, by: 0).combine(ch_idx, by: 0).combine(ch_tranches, by: 0) - - APPLYVQSR ( ch_vqsr_in, fasta, fai, dict, allelespecific, truthsensitivity, mode ) - - ch_versions = ch_versions.mix(APPLYVQSR.out.versions) - - } - - emit: - haplotc_vcf = run_haplotc ? HAPLOTYPECALLER.out.vcf.collect() : [] // channel: [ val(meta), [ vcf ] ] - haplotc_index = run_haplotc ? HAPLOTYPECALLER.out.tbi.collect() : [] // channel: [ val(meta), [ tbi ] ] - - genomicsdb = GENOMICSDBIMPORT.out.genomicsdb.collect() // channel: [ val(meta), [ genomicsdb ] ] - - genotype_vcf = GENOTYPEGVCFS.out.vcf.collect() // channel: [ val(meta), [ vcf ] ] - genotype_index = GENOTYPEGVCFS.out.vcf.collect() // channel: [ val(meta), [ tbi ] ] - - recal_file = run_vqsr ? VARIANTRECALIBRATOR.out.recal.collect() : [] // channel: [ val(meta), [ recal ] ] - recal_index = run_vqsr ? VARIANTRECALIBRATOR.out.idx.collect() : [] // channel: [ val(meta), [ idx ] ] - recal_tranches = run_vqsr ? VARIANTRECALIBRATOR.out.tranches.collect() : [] // channel: [ val(meta), [ tranches ] ] - - vqsr_vcf = run_vqsr ? APPLYVQSR.out.vcf.collect() : [] // channel: [ val(meta), [ vcf ] ] - vqsr_index = run_vqsr ? APPLYVQSR.out.tbi.collect() : [] // channel: [ val(meta), [ tbi ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/markduplicates.nf b/subworkflows/nf-core/markduplicates.nf deleted file mode 100644 index 2bc595d518..0000000000 --- a/subworkflows/nf-core/markduplicates.nf +++ /dev/null @@ -1,125 +0,0 @@ -// -// MARKDUPLICATES AND/OR QC after mapping -// - -include { GATK4_ESTIMATELIBRARYCOMPLEXITY } from '../../modules/nf-core/modules/gatk4/estimatelibrarycomplexity/main' -include { GATK4_MARKDUPLICATES } from '../../modules/nf-core/modules/gatk4/markduplicates/main' -include { GATK4_MARKDUPLICATES_SPARK } from '../../modules/local/gatk4/markduplicatesspark/main' -include { QUALIMAP_BAMQC } from '../../modules/local/qualimap/bamqc/main' -include { SAMTOOLS_INDEX as INDEX_MARKDUPLICATES } from '../../modules/local/samtools/index/main' -include { SAMTOOLS_STATS } from '../../modules/nf-core/modules/samtools/stats/main' -include { SAMTOOLS_VIEWINDEX as SAMTOOLS_BAM_TO_CRAM } from '../../modules/local/samtools/viewindex/main' -include { SAMTOOLS_VIEWINDEX as SAMTOOLS_BAM_TO_CRAM_SPARK } from '../../modules/local/samtools/viewindex/main' -include { DEEPTOOLS_BAMCOVERAGE } from '../../modules/local/deeptools/bamcoverage' - -workflow MARKDUPLICATES { - take: - bam_mapped // channel: [mandatory, if --skip_markdiplicate is false, else optional] meta, bam - bam_indexed // channel: [mandatory, if --skip_markduplicates is set, else optional] meta, bam, bai - use_gatk_spark // value: [mandatory] use gatk spark - save_metrics // value: [mandatory] save metrics - dict // channel: [mandatory] dict - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fasta_fai - skip_markduplicates // boolean: true/false - skip_bamqc // boolean: true/false - skip_samtools // boolean: true/false - skip_coverage // boolean: true/false - intervals_combined_bed_gz_tbi // channel: [optional] intervals_bed.gz, intervals_bed.gz.tbi - - main: - - ch_versions = Channel.empty() - report_markduplicates = Channel.empty() - - if (skip_markduplicates) { - bam_bai_markduplicates = bam_indexed - SAMTOOLS_BAM_TO_CRAM(bam_bai_markduplicates, fasta, fasta_fai) - cram_markduplicates = SAMTOOLS_BAM_TO_CRAM.out.cram_crai - - ch_versions = ch_versions.mix(SAMTOOLS_BAM_TO_CRAM.out.versions.first()) - } else { - if (use_gatk_spark) { - //If BAMQC should be run on MD output, then don't use MDSpark to convert to cram, but use bam output instead - if (!skip_bamqc || !skip_coverage) { - GATK4_MARKDUPLICATES_SPARK(bam_mapped, fasta, fasta_fai, dict, "bam") - INDEX_MARKDUPLICATES(GATK4_MARKDUPLICATES_SPARK.out.output) - bam_markduplicates = GATK4_MARKDUPLICATES_SPARK.out.output.join(INDEX_MARKDUPLICATES.out.bam_bai) - - SAMTOOLS_BAM_TO_CRAM_SPARK(bam_markduplicates, fasta, fasta_fai) - cram_markduplicates = SAMTOOLS_BAM_TO_CRAM_SPARK.out.cram_crai - - ch_versions = ch_versions.mix(GATK4_MARKDUPLICATES_SPARK.out.versions.first()) - ch_versions = ch_versions.mix(INDEX_MARKDUPLICATES.out.versions.first()) - ch_versions = ch_versions.mix(SAMTOOLS_BAM_TO_CRAM_SPARK.out.versions.first()) - } else { - GATK4_MARKDUPLICATES_SPARK(bam_mapped, fasta, fasta_fai, dict, "cram") - INDEX_MARKDUPLICATES(GATK4_MARKDUPLICATES_SPARK.out.output) - cram_markduplicates = GATK4_MARKDUPLICATES_SPARK.out.output.join(INDEX_MARKDUPLICATES.out.crai) - - ch_versions = ch_versions.mix(GATK4_MARKDUPLICATES_SPARK.out.versions.first()) - ch_versions = ch_versions.mix(INDEX_MARKDUPLICATES.out.versions.first()) - } - - if (save_metrics) { - GATK4_ESTIMATELIBRARYCOMPLEXITY(bam_mapped, fasta, fasta_fai, dict) - report_markduplicates = GATK4_ESTIMATELIBRARYCOMPLEXITY.out.metrics - - ch_versions = ch_versions.mix(GATK4_ESTIMATELIBRARYCOMPLEXITY.out.versions.first()) - } - - } else { - GATK4_MARKDUPLICATES(bam_mapped) - report_markduplicates = GATK4_MARKDUPLICATES.out.metrics - bam_markduplicates = GATK4_MARKDUPLICATES.out.bam - bai_markduplicates = GATK4_MARKDUPLICATES.out.bai - bam_bai_markduplicates = bam_markduplicates.join(bai_markduplicates) - - SAMTOOLS_BAM_TO_CRAM(bam_bai_markduplicates, fasta, fasta_fai) - cram_markduplicates = SAMTOOLS_BAM_TO_CRAM.out.cram_crai - - ch_versions = ch_versions.mix(GATK4_MARKDUPLICATES.out.versions.first()) - ch_versions = ch_versions.mix(SAMTOOLS_BAM_TO_CRAM.out.versions.first()) - } - } - - //If skip_markduplicates then QC tools are run on mapped bams, - //if !skip_markduplicates, then QC tools are run on duplicate marked crams - //After bamqc finishes, convert to cram for further analysis - samtools_stats = Channel.empty() - if (!skip_samtools) { - SAMTOOLS_STATS(cram_markduplicates, fasta) - samtools_stats = SAMTOOLS_STATS.out.stats - - ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) - } - - qualimap_bamqc = Channel.empty() - if (!skip_bamqc) { - - if(!params.wes || params.no_intervals) intervals_combined_bed_gz_tbi = [] //TODO: intervals also with WGS data? Probably need a parameter if WGS for deepvariant tool, that would allow to check here too - - QUALIMAP_BAMQC(bam_bai_markduplicates, intervals_combined_bed_gz_tbi) - qualimap_bamqc = QUALIMAP_BAMQC.out.results - - ch_versions = ch_versions.mix(QUALIMAP_BAMQC.out.versions.first()) - } - - deeptools_coverage = Channel.empty() - if (!skip_coverage) { - - DEEPTOOLS_BAMCOVERAGE(bam_bai_markduplicates) - deeptools_coverage = DEEPTOOLS_BAMCOVERAGE.out.bigwig - - ch_versions = ch_versions.mix(DEEPTOOLS_BAMCOVERAGE.out.versions) - } - - qc_reports = samtools_stats.mix(qualimap_bamqc) - qc_reports = report_markduplicates.mix(qc_reports) - - emit: - cram = cram_markduplicates - qc = qc_reports - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/prepare_recalibration.nf b/subworkflows/nf-core/prepare_recalibration.nf deleted file mode 100644 index 9e2e352025..0000000000 --- a/subworkflows/nf-core/prepare_recalibration.nf +++ /dev/null @@ -1,64 +0,0 @@ -// -// PREPARE RECALIBRATION -// - -include { GATK4_BASERECALIBRATOR as BASERECALIBRATOR } from '../../modules/nf-core/modules/gatk4/baserecalibrator/main' -include { GATK4_BASERECALIBRATOR_SPARK as BASERECALIBRATOR_SPARK } from '../../modules/local/gatk4/baserecalibratorspark/main' -include { GATK4_GATHERBQSRREPORTS as GATHERBQSRREPORTS } from '../../modules/nf-core/modules/gatk4/gatherbqsrreports/main' - -workflow PREPARE_RECALIBRATION { - take: - cram_markduplicates // channel: [mandatory] cram_markduplicates - use_gatk_spark // value: [mandatory] use gatk spark - dict // channel: [mandatory] dict - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fasta_fai - intervals // channel: [mandatory] intervals - num_intervals - known_sites // channel: [optional] known_sites - known_sites_tbi // channel: [optional] known_sites_tbi - no_intervals // value: [mandatory] no_intervals - - main: - - ch_versions = Channel.empty() - - cram_markduplicates.combine(intervals) - .map{ meta, cram, crai, intervals -> - new_meta = meta.clone() - new_meta.id = intervals.baseName != "no_intervals" ? meta.sample + "_" + intervals.baseName : meta.sample - [new_meta, cram, crai, intervals] - }.set{cram_markduplicates_intervals} - - if (use_gatk_spark) { - BASERECALIBRATOR_SPARK(cram_markduplicates_intervals, fasta, fasta_fai, dict, known_sites, known_sites_tbi) - table_baserecalibrator = BASERECALIBRATOR_SPARK.out.table - ch_versions = ch_versions.mix(BASERECALIBRATOR_SPARK.out.versions) - - } else { - BASERECALIBRATOR(cram_markduplicates_intervals, fasta, fasta_fai, dict, known_sites, known_sites_tbi) - table_baserecalibrator = BASERECALIBRATOR.out.table - ch_versions = ch_versions.mix(BASERECALIBRATOR.out.versions) - } - - //STEP 3.5: MERGING RECALIBRATION TABLES - if (no_intervals) { - table_baserecalibrator.map { meta, table -> - meta.id = meta.sample - [meta, table] - }.set{table_bqsr} - } else { - table_baserecalibrator.map{ meta, table -> - meta.id = meta.sample - [meta, table] - }.groupTuple(size: num_intervals).set{recaltable} - - GATHERBQSRREPORTS(recaltable) - table_bqsr = GATHERBQSRREPORTS.out.table - ch_versions = ch_versions.mix(GATHERBQSRREPORTS.out.versions) - } - - emit: - table_bqsr = table_bqsr - versions = ch_versions // channel: [versions.yml] -} diff --git a/subworkflows/nf-core/recalibrate.nf b/subworkflows/nf-core/recalibrate.nf deleted file mode 100644 index c4c58406c1..0000000000 --- a/subworkflows/nf-core/recalibrate.nf +++ /dev/null @@ -1,91 +0,0 @@ -// -// RECALIBRATE -// - -include { GATK4_APPLYBQSR as APPLYBQSR } from '../../modules/nf-core/modules/gatk4/applybqsr/main' -include { GATK4_APPLYBQSR_SPARK as APPLYBQSR_SPARK } from '../../modules/local/gatk4/applybqsrspark/main' -include { QUALIMAP_BAMQC_CRAM } from '../../modules/local/qualimap/bamqccram/main' -include { SAMTOOLS_INDEX as INDEX_RECALIBRATE } from '../../modules/local/samtools/index/main' -include { SAMTOOLS_MERGE_CRAM } from '../../modules/local/samtools/mergecram/main' -include { SAMTOOLS_STATS } from '../../modules/nf-core/modules/samtools/stats/main' - -workflow RECALIBRATE { - take: - use_gatk_spark // value: [mandatory] use gatk spark - skip_bamqc // boolean: true/false - skip_samtools // boolean: true/false - cram // channel: [mandatory] cram - dict // channel: [mandatory] dict - fasta // channel: [mandatory] fasta - fasta_fai // channel: [mandatory] fasta_fai - intervals // channel: [mandatory] intervals - num_intervals - no_intervals - intervals_combined_bed_gz_tbi - - main: - - ch_versions = Channel.empty() - cram_recalibrated_index = Channel.empty() - cram_recalibrated = Channel.empty() - cram_reports = Channel.empty() - - cram.combine(intervals).map{ meta, cram, crai, recal, intervals -> - new_meta = meta.clone() - new_meta.id = intervals.baseName != "no_intervals" ? meta.sample + "_" + intervals.baseName : meta.sample - [new_meta, cram, crai, recal, intervals] - }.set{cram_intervals} - - if(use_gatk_spark){ - APPLYBQSR_SPARK(cram_intervals, fasta, fasta_fai, dict) - cram_applybqsr = APPLYBQSR_SPARK.out.cram - ch_versions = ch_versions.mix(APPLYBQSR_SPARK.out.versions) - }else{ - APPLYBQSR(cram_intervals, fasta, fasta_fai, dict) - cram_applybqsr = APPLYBQSR.out.cram - ch_versions = ch_versions.mix(APPLYBQSR.out.versions) - } - - // STEP 4.5: MERGING AND INDEXING THE RECALIBRATED BAM FILES - if (params.no_intervals) { - cram_recalibrated = cram_applybqsr - } else { - cram_applybqsr.map{ meta, cram -> - meta.id = meta.sample - [meta, cram] - }.groupTuple(size: num_intervals).set{cram_recalibrated_interval} - - SAMTOOLS_MERGE_CRAM(cram_recalibrated_interval, fasta) - cram_recalibrated = SAMTOOLS_MERGE_CRAM.out.cram - ch_versions = ch_versions.mix(SAMTOOLS_MERGE_CRAM.out.versions) - } - - INDEX_RECALIBRATE(cram_recalibrated) - cram_recalibrated_index = INDEX_RECALIBRATE.out.cram_crai - ch_versions = ch_versions.mix(INDEX_RECALIBRATE.out.versions) - - qualimap_bamqc = Channel.empty() - samtools_stats = Channel.empty() - - if (!skip_bamqc) { - - if(!params.wes || params.no_intervals) intervals_combined_bed_gz_tbi = [] //TODO: intervals also with WGS data? Probably need a parameter if WGS for deepvariant tool, that would allow to check here too - - QUALIMAP_BAMQC_CRAM(cram_recalibrated_index, intervals_combined_bed_gz_tbi, fasta, fasta_fai) - qualimap_bamqc = QUALIMAP_BAMQC_CRAM.out.results - ch_versions = ch_versions.mix(QUALIMAP_BAMQC_CRAM.out.versions) - } - - if (!skip_samtools) { - SAMTOOLS_STATS(cram_recalibrated_index, fasta) - samtools_stats = SAMTOOLS_STATS.out.stats - ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) - } - cram_reports = samtools_stats.mix(qualimap_bamqc) - - - emit: - cram = cram_recalibrated_index - qc = cram_reports - versions = ch_versions -} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 0000000000..d6e593e852 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info("${workflow.manifest.name} ${getWorkflowVersion()}") + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = groovy.json.JsonOutput.toJson(params) + temp_pf.text = groovy.json.JsonOutput.prettyPrint(jsonStr) + + nextflow.extension.FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + def parser = new org.yaml.snakeyaml.Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } + catch (NullPointerException e) { + log.debug(e) + log.warn("Could not verify conda channel configuration.") + return null + } + catch (IOException e) { + log.debug(e) + log.warn("Could not verify conda channel configuration.") + return null + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = required_channels_in_order != channels.findAll { ch -> ch in required_channels_in_order } + + if (channels_missing | channel_priority_violation) { + log.warn """\ + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + There is a problem with your Conda configuration! + You will need to set-up the conda-forge and bioconda channels correctly. + Please refer to https://bioconda.github.io/ + The observed channel order is + ${channels} + but the following channel order is required: + ${required_channels_in_order} + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + """.stripIndent(true) + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 0000000000..e5c3a0a828 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 0000000000..bf568a08b9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,355 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NFCORE_PIPELINE { + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + def valid_config = true as Boolean + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn( + "[${workflow.manifest.name}] You are attempting to run the pipeline without any custom configuration!\n\n" + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + "Please refer to the quick start section and usage docs for the pipeline.\n " + ) + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error( + "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } + if (nextflow_cli_args[0]) { + log.warn( + "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + def yaml = new org.yaml.snakeyaml.Yaml() + def versions = yaml.load(yaml_file).collectEntries { k, v -> [k.tokenize(':')[-1], v] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + ${workflow.manifest.name}: ${getWorkflowVersion()} + Nextflow: ${workflow.nextflow.version} + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + summary_params + .keySet() + .each { group -> + def group_params = summary_params.get(group) + // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    ${group}

    \n" + summary_section += "
    \n" + group_params + .keySet() + .sort() + .each { param -> + summary_section += "
    ${param}
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + def yaml_file_text = "id: '${workflow.manifest.name.replace('/', '-')}-summary'\n" as String + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + def colorcodes = [:] as Map + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// Return a single report from an object that may be a Path or List +// +def getSingleReport(multiqc_reports) { + if (multiqc_reports instanceof Path) { + return multiqc_reports + } else if (multiqc_reports instanceof List) { + if (multiqc_reports.size() == 0) { + log.warn("[${workflow.manifest.name}] No reports found from process 'MULTIQC'") + return null + } else if (multiqc_reports.size() == 1) { + return multiqc_reports.first() + } else { + log.warn("[${workflow.manifest.name}] Found multiple reports from process 'MULTIQC', will use only one") + return multiqc_reports.first() + } + } else { + return null + } +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[${workflow.manifest.name}] Successful: ${workflow.runName}" + if (!workflow.success) { + subject = "[${workflow.manifest.name}] FAILED: ${workflow.runName}" + } + + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) { + misc_fields['Pipeline repository Git URL'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['Pipeline repository Git Commit'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['Pipeline Git branch/tag'] = workflow.revision + } + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = getSingleReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as MemoryUnit + def smail_fields = [email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + def colors = logColours(monochrome_logs) as Map + if (email_address) { + try { + if (plaintext_email) { + new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') + } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + ['sendmail', '-t'].execute() << sendmail_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (sendmail)-") + } + catch (Exception msg) { + log.debug(msg.toString()) + log.debug("Trying with mail instead of sendmail") + // Catch failures and try with plaintext + def mail_cmd = ['mail', '-s', subject, '--content-type=text/html', email_address] + mail_cmd.execute() << email_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (mail)-") + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + nextflow.extension.FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html") + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + nextflow.extension.FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt") + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Pipeline completed successfully${colors.reset}-") + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-") + } + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.red} Pipeline completed with errors${colors.reset}-") + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 0000000000..d08d24342d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf new file mode 100644 index 0000000000..1df8b76fba --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -0,0 +1,73 @@ +// +// Subworkflow that uses the nf-schema plugin to validate parameters and render the parameter summary +// + +include { paramsSummaryLog } from 'plugin/nf-schema' +include { validateParameters } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' + +workflow UTILS_NFSCHEMA_PLUGIN { + + take: + input_workflow // workflow: the workflow object used by nf-schema to get metadata from the workflow + validate_params // boolean: validate the parameters + parameters_schema // string: path to the parameters JSON schema. + // this has to be the same as the schema given to `validation.parametersSchema` + // when this input is empty it will automatically use the configured schema or + // "${projectDir}/nextflow_schema.json" as default. This input should not be empty + // for meta pipelines + help // boolean: show help message + help_full // boolean: show full help message + show_hidden // boolean: show hidden parameters in help message + before_text // string: text to show before the help message and parameters summary + after_text // string: text to show after the help message and parameters summary + command // string: an example command of the pipeline + + main: + + if(help || help_full) { + help_options = [ + beforeText: before_text, + afterText: after_text, + command: command, + showHidden: show_hidden, + fullHelp: help_full, + ] + if(parameters_schema) { + help_options << [parametersSchema: parameters_schema] + } + log.info paramsHelp( + help_options, + (params.help instanceof String && params.help != "true") ? params.help : "", + ) + exit 0 + } + + // + // Print parameter summary to stdout. This will display the parameters + // that differ from the default given in the JSON schema + // + + summary_options = [:] + if(parameters_schema) { + summary_options << [parametersSchema: parameters_schema] + } + log.info before_text + log.info paramsSummaryLog(summary_options, input_workflow) + log.info after_text + + // + // Validate the parameters using nextflow_schema.json or the schema + // given via the validation.parametersSchema configuration option + // + if(validate_params) { + validateOptions = [:] + if(parameters_schema) { + validateOptions << [parametersSchema: parameters_schema] + } + validateParameters(validateOptions) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfschema_plugin/meta.yml b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml new file mode 100644 index 0000000000..f7d9f02885 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml @@ -0,0 +1,35 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "utils_nfschema_plugin" +description: Run nf-schema to validate parameters and create a summary of changed parameters +keywords: + - validation + - JSON schema + - plugin + - parameters + - summary +components: [] +input: + - input_workflow: + type: object + description: | + The workflow object of the used pipeline. + This object contains meta data used to create the params summary log + - validate_params: + type: boolean + description: Validate the parameters and error if invalid. + - parameters_schema: + type: string + description: | + Path to the parameters JSON schema. + This has to be the same as the schema given to the `validation.parametersSchema` config + option. When this input is empty it will automatically use the configured schema or + "${projectDir}/nextflow_schema.json" as default. The schema should not be given in this way + for meta pipelines. +output: + - dummy_emit: + type: boolean + description: Dummy emit to make nf-core subworkflows lint happy +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/subworkflows/nf-core/vcf_annotate_snpeff/main.nf b/subworkflows/nf-core/vcf_annotate_snpeff/main.nf new file mode 100644 index 0000000000..3eccc896f8 --- /dev/null +++ b/subworkflows/nf-core/vcf_annotate_snpeff/main.nf @@ -0,0 +1,24 @@ +// +// Run SNPEFF to annotate VCF files +// + +include { SNPEFF_SNPEFF } from '../../../modules/nf-core/snpeff/snpeff' +include { TABIX_BGZIPTABIX } from '../../../modules/nf-core/tabix/bgziptabix' + +workflow VCF_ANNOTATE_SNPEFF { + take: + ch_vcf // channel: [ val(meta), path(vcf) ] + val_snpeff_db // string: db version to use + ch_snpeff_cache // channel: [ path(cache) ] (optional) + + main: + SNPEFF_SNPEFF(ch_vcf, val_snpeff_db, ch_snpeff_cache) + TABIX_BGZIPTABIX(SNPEFF_SNPEFF.out.vcf) + + + emit: + vcf_tbi = TABIX_BGZIPTABIX.out.gz_index // channel: [ val(meta), path(vcf), path(tbi) ] + reports = SNPEFF_SNPEFF.out.report // channel: [ path(html) ] + summary = SNPEFF_SNPEFF.out.summary_html // channel: [ path(html) ] + genes_txt = SNPEFF_SNPEFF.out.genes_txt // channel: [ path(genes.txt) ] +} diff --git a/subworkflows/nf-core/vcf_annotate_snpeff/meta.yml b/subworkflows/nf-core/vcf_annotate_snpeff/meta.yml new file mode 100644 index 0000000000..bf2db6523b --- /dev/null +++ b/subworkflows/nf-core/vcf_annotate_snpeff/meta.yml @@ -0,0 +1,48 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: vcf_annotate_snpeff +description: Perform annotation with snpEff and bgzip + tabix index the resulting VCF file +keywords: + - vcf + - annotation + - snpeff +components: + - snpeff + - snpeff/snpeff + - tabix/bgziptabix +input: + - ch_vcf: + description: | + vcf file + Structure: [ val(meta), path(vcf) ] + - val_snpeff_db: + type: string + description: db version to use + - ch_snpeff_cache: + description: | + path to root cache folder for snpEff (optional) + Structure: [ path(cache) ] +output: + - vcf_tbi: + description: | + Compressed vcf file + tabix index + Structure: [ val(meta), path(vcf), path(tbi) ] + - reports: + description: | + html reports + Structure: [ path(html) ] + - summary: + description: | + html reports + Structure: [ path(csv) ] + - genes_txt: + description: | + html reports + Structure: [ path(txt) ] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/tests/.nftignore b/tests/.nftignore new file mode 100644 index 0000000000..7ccdddf1dc --- /dev/null +++ b/tests/.nftignore @@ -0,0 +1,56 @@ +.DS_Store +annotation/**/*.vcf.{gz,gz.tbi} +csv/*.csv +multiqc/multiqc_data/bcftools*.txt +multiqc/multiqc_data/bbmap*.txt +multiqc/multiqc_data/bbsplit_stats_table.txt +multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt +multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt +multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt +multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt +multiqc/multiqc_data/gatk_base_recalibrator.txt +multiqc/multiqc_data/llms-full.txt +multiqc/multiqc_data/mosdepth_cov_dist.txt +multiqc/multiqc_data/mosdepth_cumcov_dist.txt +multiqc/multiqc_data/multiqc.log +multiqc/multiqc_data/multiqc.parquet +multiqc/multiqc_data/multiqc_bcftools_stats.txt +multiqc/multiqc_data/multiqc_data.json +multiqc/multiqc_data/multiqc_general_stats.txt +multiqc/multiqc_data/multiqc_picard_dups.txt +multiqc/multiqc_data/multiqc_samtools_stats.txt +multiqc/multiqc_data/multiqc_software_versions.txt +multiqc/multiqc_data/multiqc_sources.txt +multiqc/multiqc_data/picard_deduplication.txt +multiqc/multiqc_data/vcftools*.txt +multiqc/multiqc_plots/{svg,pdf,png}/*.{svg,pdf,png} +multiqc/multiqc_report.html +no_intervals.{bed,bed.gz,bed.gz.tbi} +pipeline_info/*.{html,json,txt,yml} +preprocessing/**/*.{bam,bam.bai,cram,cram.crai,table} +preprocessing/bbsplit/*/*.fastq.gz +preprocessing/bbsplit/*/*.stats.txt +reference/dragmap/hash_table.{cfg,cfg.bin} +reference/dragmap/hash_table_stats.txt +reports/**/*.cram.metrics{,.multiqc.tsv} +reports/EnsemblVEP/*/*.ann.summary.html +reports/fastp/**/*.fastp.{log,html} +reports/fastqc/**/*_fastqc.{html,zip} +reports/ngscheckmate/ngscheckmate.pdf +reports/ngscheckmate/vcfs/*.ngscheckmate.vcf.gz +reports/samtools/**/**.cram.stats +reports/snpeff/*/*_snpEff.csv +reports/snpeff/*/snpEff_summary.html +reports/vcftools/**/*.TsTv.qual +variant_calling/**/*.{bcf,vcf,vcf.gz,vcf.gz.tbi} +variant_calling/**/*.{pdf,png} +variant_calling/ascat/*/*.metrics.txt +variant_calling/consensus/*/*/README.txt +variant_calling/consensus/*/*/0000.vcf.gz* +variant_calling/consensus/*/*/sites.txt +variant_calling/controlfreec/*/config.txt +variant_calling/msisensor2/*/*_somatic +variant_calling/muse/*/*.MuSE.txt +variant_calling/mutect2/*/*.mutect2.artifactprior.tar.gz +variant_calling/tiddit/*/*.tiddit.ploidies.tab +variant_calling/varlociraptor/*/*.alignment-properties.json diff --git a/tests/aligner-bwa-mem.nf.test b/tests/aligner-bwa-mem.nf.test new file mode 100644 index 0000000000..cf9886d053 --- /dev/null +++ b/tests/aligner-bwa-mem.nf.test @@ -0,0 +1,55 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --aligner bwa-mem --save_reference skip QC/recal/md", + params: [ + aligner: 'bwa-mem', + save_reference: true, + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: '', + ], + ], + [ + name: "-profile test --aligner bwa-mem --save_reference --build_only_index", + params: [ + aligner: 'bwa-mem', + build_only_index: true, + input: false, + save_reference: true, + skip_tools: 'multiqc', + tools: '', + ], + ], + [ + name: "-profile test --aligner bwa-mem --save_reference skip QC/recal/md -stub", + params: [ + aligner: 'bwa-mem', + save_reference: true, + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: '', + ], + stub: true, + ], + [ + name: "-profile test --aligner bwa-mem --save_reference --build_only_index -stub", + params: [ + aligner: 'bwa-mem', + build_only_index: true, + input: false, + save_reference: true, + skip_tools: 'multiqc', + tools: '', + ], + stub: true, + ], + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/aligner-bwa-mem.nf.test.snap b/tests/aligner-bwa-mem.nf.test.snap new file mode 100644 index 0000000000..10fe5f3c85 --- /dev/null +++ b/tests/aligner-bwa-mem.nf.test.snap @@ -0,0 +1,265 @@ +{ + "-profile test --aligner bwa-mem --save_reference skip QC/recal/md": { + "content": [ + 10, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "reference", + "reference/bwa", + "reference/bwa/genome.amb", + "reference/bwa/genome.ann", + "reference/bwa/genome.bwt", + "reference/bwa/genome.pac", + "reference/bwa/genome.sa", + "reference/intervals", + "reference/intervals/chr22_1-40001.bed", + "reference/intervals/chr22_1-40001.bed.gz", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz" + ], + [ + "genome.amb:md5,1891c1de381b3a96d4e72f590fde20c1", + "genome.ann:md5,2df4aa2d7580639fa0fcdbcad5e2e969", + "genome.bwt:md5,815eded87e4cb6b0f1daab5c4d6e30af", + "genome.pac:md5,8569fbdb2c98c6fb16dfa73d8eacb070", + "genome.sa:md5,e7cff62b919448a3a3d0fe4aaf427594", + "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e" + ], + "No BAM files", + [ + "test.sorted.cram:md5,5534c350547fd253f0f2b9450362bed" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T18:55:05.917351813" + }, + "-profile test --aligner bwa-mem --save_reference --build_only_index -stub": { + "content": [ + 5, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/bwa", + "reference/bwa/genome.amb", + "reference/bwa/genome.ann", + "reference/bwa/genome.bwt", + "reference/bwa/genome.pac", + "reference/bwa/genome.sa", + "reference/intervals", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz", + "reference/intervals/genome.stub.bed", + "reference/intervals/genome.stub.bed.gz" + ], + [ + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T18:57:28.07820518" + }, + "-profile test --aligner bwa-mem --save_reference --build_only_index": { + "content": [ + 5, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/bwa", + "reference/bwa/genome.amb", + "reference/bwa/genome.ann", + "reference/bwa/genome.bwt", + "reference/bwa/genome.pac", + "reference/bwa/genome.sa", + "reference/intervals", + "reference/intervals/chr22_1-40001.bed", + "reference/intervals/chr22_1-40001.bed.gz", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz" + ], + [ + "genome.amb:md5,1891c1de381b3a96d4e72f590fde20c1", + "genome.ann:md5,2df4aa2d7580639fa0fcdbcad5e2e969", + "genome.bwt:md5,815eded87e4cb6b0f1daab5c4d6e30af", + "genome.pac:md5,8569fbdb2c98c6fb16dfa73d8eacb070", + "genome.sa:md5,e7cff62b919448a3a3d0fe4aaf427594", + "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e" + ], + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.3" + }, + "timestamp": "2025-06-12T10:31:18.476995966" + }, + "-profile test --aligner bwa-mem --save_reference skip QC/recal/md -stub": { + "content": [ + 10, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "reference", + "reference/bwa", + "reference/bwa/genome.amb", + "reference/bwa/genome.ann", + "reference/bwa/genome.bwt", + "reference/bwa/genome.pac", + "reference/bwa/genome.sa", + "reference/intervals", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz", + "reference/intervals/genome.stub.bed", + "reference/intervals/genome.stub.bed.gz" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1", + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T18:56:44.455023922" + } +} diff --git a/tests/aligner-bwa-mem2.nf.test b/tests/aligner-bwa-mem2.nf.test new file mode 100644 index 0000000000..e6cdb189aa --- /dev/null +++ b/tests/aligner-bwa-mem2.nf.test @@ -0,0 +1,55 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --aligner bwa-mem2 --save_reference skip QC/recal/md", + params: [ + aligner: 'bwa-mem2', + save_reference: true, + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: '', + ], + ], + [ + name: "-profile test --aligner bwa-mem2 --save_reference --build_only_index", + params: [ + aligner: 'bwa-mem2', + build_only_index: true, + input: false, + save_reference: true, + skip_tools: 'multiqc', + tools: '', + ], + ], + [ + name: "-profile test --aligner bwa-mem2 --save_reference skip QC/recal/md - stub", + params: [ + aligner: 'bwa-mem2', + save_reference: true, + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: '', + ], + stub: true, + ], + [ + name: "-profile test --aligner bwa-mem2 --save_reference --build_only_index - stub", + params: [ + aligner: 'bwa-mem2', + build_only_index: true, + input: false, + save_reference: true, + skip_tools: 'multiqc', + tools: '', + ], + stub: true, + ], + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/aligner-bwa-mem2.nf.test.snap b/tests/aligner-bwa-mem2.nf.test.snap new file mode 100644 index 0000000000..060bc8e9e7 --- /dev/null +++ b/tests/aligner-bwa-mem2.nf.test.snap @@ -0,0 +1,265 @@ +{ + "-profile test --aligner bwa-mem2 --save_reference --build_only_index": { + "content": [ + 5, + { + "BWAMEM2_INDEX": { + "bwamem2": "2.2.1" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/bwamem2", + "reference/bwamem2/genome.fasta.0123", + "reference/bwamem2/genome.fasta.amb", + "reference/bwamem2/genome.fasta.ann", + "reference/bwamem2/genome.fasta.bwt.2bit.64", + "reference/bwamem2/genome.fasta.pac", + "reference/intervals", + "reference/intervals/chr22_1-40001.bed", + "reference/intervals/chr22_1-40001.bed.gz", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz" + ], + [ + "genome.fasta.0123:md5,d73300d44f733bcdb7c988fc3ff3e3e9", + "genome.fasta.amb:md5,1891c1de381b3a96d4e72f590fde20c1", + "genome.fasta.ann:md5,2df4aa2d7580639fa0fcdbcad5e2e969", + "genome.fasta.bwt.2bit.64:md5,cd4bdf496eab05228a50c45ee43c1ed0", + "genome.fasta.pac:md5,8569fbdb2c98c6fb16dfa73d8eacb070", + "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e" + ], + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings" + ], + "timestamp": "2025-06-12T10:35:55.383074123", + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.3" + } + }, + "-profile test --aligner bwa-mem2 --save_reference skip QC/recal/md": { + "content": [ + 10, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "BWAMEM2_INDEX": { + "bwamem2": "2.2.1" + }, + "BWAMEM2_MEM": { + "bwamem2": "2.2.1", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "reference", + "reference/bwamem2", + "reference/bwamem2/genome.fasta.0123", + "reference/bwamem2/genome.fasta.amb", + "reference/bwamem2/genome.fasta.ann", + "reference/bwamem2/genome.fasta.bwt.2bit.64", + "reference/bwamem2/genome.fasta.pac", + "reference/intervals", + "reference/intervals/chr22_1-40001.bed", + "reference/intervals/chr22_1-40001.bed.gz", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz" + ], + [ + "genome.fasta.0123:md5,d73300d44f733bcdb7c988fc3ff3e3e9", + "genome.fasta.amb:md5,1891c1de381b3a96d4e72f590fde20c1", + "genome.fasta.ann:md5,2df4aa2d7580639fa0fcdbcad5e2e969", + "genome.fasta.bwt.2bit.64:md5,cd4bdf496eab05228a50c45ee43c1ed0", + "genome.fasta.pac:md5,8569fbdb2c98c6fb16dfa73d8eacb070", + "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e" + ], + "No BAM files", + [ + "test.sorted.cram:md5,5534c350547fd253f0f2b9450362bed" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "timestamp": "2025-12-15T18:58:41.317252772", + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + } + }, + "-profile test --aligner bwa-mem2 --save_reference --build_only_index - stub": { + "content": [ + 5, + { + "BWAMEM2_INDEX": { + "bwamem2": "2.2.1" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/bwamem2", + "reference/bwamem2/genome.fasta.0123", + "reference/bwamem2/genome.fasta.amb", + "reference/bwamem2/genome.fasta.ann", + "reference/bwamem2/genome.fasta.bwt.2bit.64", + "reference/bwamem2/genome.fasta.pac", + "reference/intervals", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz", + "reference/intervals/genome.stub.bed", + "reference/intervals/genome.stub.bed.gz" + ], + [ + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "timestamp": "2026-04-08T12:02:19.626170099", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.2" + } + }, + "-profile test --aligner bwa-mem2 --save_reference skip QC/recal/md - stub": { + "content": [ + 10, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "BWAMEM2_INDEX": { + "bwamem2": "2.2.1" + }, + "BWAMEM2_MEM": { + "bwamem2": "2.2.1", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "reference", + "reference/bwamem2", + "reference/bwamem2/genome.fasta.0123", + "reference/bwamem2/genome.fasta.amb", + "reference/bwamem2/genome.fasta.ann", + "reference/bwamem2/genome.fasta.bwt.2bit.64", + "reference/bwamem2/genome.fasta.pac", + "reference/intervals", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz", + "reference/intervals/genome.stub.bed", + "reference/intervals/genome.stub.bed.gz" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1", + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "timestamp": "2026-04-08T12:01:20.65440497", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.2" + } + } +} \ No newline at end of file diff --git a/tests/aligner-dragmap.nf.test b/tests/aligner-dragmap.nf.test new file mode 100644 index 0000000000..5fa39a07f8 --- /dev/null +++ b/tests/aligner-dragmap.nf.test @@ -0,0 +1,55 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --aligner dragmap --save_reference skip QC/recal/md", + params: [ + aligner: 'dragmap', + save_reference: true, + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: '', + ], + ], + [ + name: "-profile test --aligner dragmap --save_reference --build_only_index", + params: [ + aligner: 'dragmap', + build_only_index: true, + input: false, + save_reference: true, + skip_tools: 'multiqc', + tools: '', + ], + ], + [ + name: "-profile test --aligner dragmap --save_reference skip QC/recal/md - stub", + params: [ + aligner: 'dragmap', + save_reference: true, + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: '', + ], + stub: true + ], + [ + name: "-profile test --aligner dragmap --save_reference --build_only_index - stub", + params: [ + aligner: 'dragmap', + build_only_index: true, + input: false, + save_reference: true, + skip_tools: 'multiqc', + tools: '', + ], + stub: true + ], + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/aligner-dragmap.nf.test.snap b/tests/aligner-dragmap.nf.test.snap new file mode 100644 index 0000000000..a73ab15d10 --- /dev/null +++ b/tests/aligner-dragmap.nf.test.snap @@ -0,0 +1,266 @@ +{ + "-profile test --aligner dragmap --save_reference --build_only_index": { + "content": [ + 5, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "DRAGMAP_HASHTABLE": { + "dragmap": "1.2.1" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dragmap", + "reference/dragmap/hash_table.cfg", + "reference/dragmap/hash_table.cfg.bin", + "reference/dragmap/hash_table.cmp", + "reference/dragmap/hash_table_stats.txt", + "reference/dragmap/ref_index.bin", + "reference/dragmap/reference.bin", + "reference/dragmap/repeat_mask.bin", + "reference/dragmap/str_table.bin", + "reference/intervals", + "reference/intervals/chr22_1-40001.bed", + "reference/intervals/chr22_1-40001.bed.gz", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz" + ], + [ + "hash_table.cmp:md5,1caab4ffc89f81ace615a2e813295cf4", + "ref_index.bin:md5,dbb5c7d26b974e0ac338024fe4535044", + "reference.bin:md5,be67b80ee48aa96b383fd72f1ccfefea", + "repeat_mask.bin:md5,294939f1f80aa7f4a70b9b537e4c0f21", + "str_table.bin:md5,45f7818c4a10fdeed04db7a34b5f9ff1", + "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e" + ], + "No BAM files", + "No CRAM files", + "No VCF files", + [ + "WARN: DragMap was specified as aligner. Base recalibration is not contained in --skip_tools. It is recommended to skip baserecalibration when using DragMap" + ] + ], + "timestamp": "2025-12-15T19:02:48.431048619", + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + } + }, + "-profile test --aligner dragmap --save_reference skip QC/recal/md": { + "content": [ + 10, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "DRAGMAP_ALIGN": { + "dragmap": "1.2.1", + "pigz": "2.3.4", + "samtools": "1.19.2" + }, + "DRAGMAP_HASHTABLE": { + "dragmap": "1.2.1" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "reference", + "reference/dragmap", + "reference/dragmap/hash_table.cfg", + "reference/dragmap/hash_table.cfg.bin", + "reference/dragmap/hash_table.cmp", + "reference/dragmap/hash_table_stats.txt", + "reference/dragmap/ref_index.bin", + "reference/dragmap/reference.bin", + "reference/dragmap/repeat_mask.bin", + "reference/dragmap/str_table.bin", + "reference/intervals", + "reference/intervals/chr22_1-40001.bed", + "reference/intervals/chr22_1-40001.bed.gz", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz" + ], + [ + "hash_table.cmp:md5,1caab4ffc89f81ace615a2e813295cf4", + "ref_index.bin:md5,dbb5c7d26b974e0ac338024fe4535044", + "reference.bin:md5,be67b80ee48aa96b383fd72f1ccfefea", + "repeat_mask.bin:md5,294939f1f80aa7f4a70b9b537e4c0f21", + "str_table.bin:md5,45f7818c4a10fdeed04db7a34b5f9ff1", + "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e" + ], + "No BAM files", + [ + "test.sorted.cram:md5,7088dc71e5390aec0dd9d778f4568297" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "timestamp": "2025-12-15T19:01:59.187593488", + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + } + }, + "-profile test --aligner dragmap --save_reference --build_only_index - stub": { + "content": [ + 5, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "DRAGMAP_HASHTABLE": { + "dragmap": "1.2.1" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dragmap", + "reference/intervals", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz", + "reference/intervals/genome.stub.bed", + "reference/intervals/genome.stub.bed.gz" + ], + [ + "WARN: DragMap was specified as aligner. Base recalibration is not contained in --skip_tools. It is recommended to skip baserecalibration when using DragMap", + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "timestamp": "2026-04-08T12:04:22.190861311", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.2" + } + }, + "-profile test --aligner dragmap --save_reference skip QC/recal/md - stub": { + "content": [ + 10, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "DRAGMAP_ALIGN": { + "dragmap": "1.2.1", + "pigz": "2.3.4", + "samtools": "1.19.2" + }, + "DRAGMAP_HASHTABLE": { + "dragmap": "1.2.1" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "reference", + "reference/dragmap", + "reference/intervals", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz", + "reference/intervals/genome.stub.bed", + "reference/intervals/genome.stub.bed.gz" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1", + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "timestamp": "2026-04-08T12:03:21.664791416", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.2" + } + } +} \ No newline at end of file diff --git a/tests/aligner-parabricks.nf.test b/tests/aligner-parabricks.nf.test new file mode 100644 index 0000000000..8d8513429c --- /dev/null +++ b/tests/aligner-parabricks.nf.test @@ -0,0 +1,67 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + profile "test,gpu" + + def test_scenario = [ + [ + name: "-profile test --aligner parabricks --wes", + params: [ + aligner: 'parabricks', + save_reference: true, + save_mapped: true, + tools: '', + wes : true, + ], + gpu: true, + no_conda: true + ], + [ + name: "-profile test --aligner parabricks --wes --save_output_as_bam", + params: [ + aligner: 'parabricks', + save_reference: true, + save_output_as_bam: true, + tools: '', + wes : true, + ], + gpu: true, + no_conda: true + ], + [ + name: "-profile test --aligner parabricks --tools mutect2 --wes --input fastq_pair.csv", + params: [ + input : "${projectDir}/tests/csv/3.0/fastq_pair.csv", + aligner: 'parabricks', + save_reference: true, + save_mapped: true, + tools: 'mutect2', + wes : true, + ], + gpu: true, + no_conda: true + ], + [ + name: "-profile test --aligner parabricks --wes --intervals", + params: [ + aligner: 'parabricks', + save_reference: true, + save_mapped: true, + tools: '', + wes : true, + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', + ], + gpu: true, + no_conda: true + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/aligner-parabricks.nf.test.snap b/tests/aligner-parabricks.nf.test.snap new file mode 100644 index 0000000000..080939fc56 --- /dev/null +++ b/tests/aligner-parabricks.nf.test.snap @@ -0,0 +1,995 @@ +{ + "-profile test --aligner parabricks --wes --save_output_as_bam": { + "content": [ + 15, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "CRAM_TO_BAM": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MERGE_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "PARABRICKS_FQ2BAM": { + "parabricks": "4.6.0-1" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/parabricks", + "preprocessing/parabricks/test", + "preprocessing/parabricks/test-test_L1", + "preprocessing/parabricks/test-test_L2", + "preprocessing/parabricks/test/test.bam", + "preprocessing/parabricks/test/test.bam.bai", + "reference", + "reference/bwa", + "reference/bwa/genome.fasta.amb", + "reference/bwa/genome.fasta.ann", + "reference/bwa/genome.fasta.bwt", + "reference/bwa/genome.fasta.pac", + "reference/bwa/genome.fasta.sa", + "reference/intervals", + "reference/intervals/chr22_1-40001.bed", + "reference/intervals/chr22_1-40001.bed.gz", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz", + "reports", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test-test_L2", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.zip", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.zip", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats" + ], + [ + "fastqc-status-check-heatmap.txt:md5,a020b9689ddeb4abec16b4854fe452f1", + "fastqc_adapter_content_plot.txt:md5,2e1b72be741319e7fadbbb39d7e5b37d", + "fastqc_per_base_n_content_plot.txt:md5,ad3b971a6bb4e8ba6c844c8a03584eb8", + "fastqc_per_base_sequence_quality_plot.txt:md5,1bc03889d243a944253ac637d81ae10c", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,2c42d140ce06c08dad2b58f397c23239", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,59e22821d350bfb97c37ffd9088f5ad9", + "fastqc_per_sequence_quality_scores_plot.txt:md5,f33615cc98bb6225f39545a415fa7c0f", + "fastqc_sequence_counts_plot.txt:md5,7f0f19a58e8e54e792a751fd04a9ae13", + "fastqc_sequence_duplication_levels_plot.txt:md5,92b02e250ff78725deb9a10d510fcecc", + "fastqc_sequence_length_distribution_plot.txt:md5,fb04dce68ec566314125bc9438211b28", + "mosdepth-coverage-per-contig-single.txt:md5,f3a4ba86603c5121a26f54616a39c1ba", + "mosdepth-cumcoverage-dist-id.txt:md5,171f3912fd2b6c5e9c9602e5527bdff7", + "mosdepth_perchrom.txt:md5,f3a4ba86603c5121a26f54616a39c1ba", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "multiqc_fastqc.txt:md5,bde0d0bffa62228b33fb68b7e25b6ff8", + "samtools-stats-dp.txt:md5,068d671d8e3fa0a12b9c36072fb36808", + "samtools_alignment_plot.txt:md5,d2729501a111a33268c008ef6f18c7cd", + "genome.fasta.amb:md5,1891c1de381b3a96d4e72f590fde20c1", + "genome.fasta.ann:md5,2df4aa2d7580639fa0fcdbcad5e2e969", + "genome.fasta.bwt:md5,815eded87e4cb6b0f1daab5c4d6e30af", + "genome.fasta.pac:md5,8569fbdb2c98c6fb16dfa73d8eacb070", + "genome.fasta.sa:md5,e7cff62b919448a3a3d0fe4aaf427594", + "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "test.recal.mosdepth.global.dist.txt:md5,45016e5a0b9afb065c56833ba66f3049", + "test.recal.mosdepth.region.dist.txt:md5,45016e5a0b9afb065c56833ba66f3049", + "test.recal.mosdepth.summary.txt:md5,3968d28e14fd99350ec75e919778ad0f", + "test.recal.per-base.bed.gz:md5,ab3fb06c65b28f774b22973085699d51", + "test.recal.per-base.bed.gz.csi:md5,bc588fff78a04f4c31e16768ddde204d", + "test.recal.regions.bed.gz:md5,912777bfe53139edf4948106611d3aeb", + "test.recal.regions.bed.gz.csi:md5,e24653e1e1cc9703eecaee9ebf3d2b13" + ], + [ + "test.bam:md5,9da56e1adc10f4fa65303441b8a5a328" + ], + "No CRAM files", + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-30T16:47:31.914017014" + }, + "-profile test --aligner parabricks --wes": { + "content": [ + 14, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MERGE_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "PARABRICKS_FQ2BAM": { + "parabricks": "4.6.0-1" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/parabricks", + "preprocessing/parabricks/test", + "preprocessing/parabricks/test-test_L1", + "preprocessing/parabricks/test-test_L1/test-test_L1.cram", + "preprocessing/parabricks/test-test_L1/test-test_L1.cram.crai", + "preprocessing/parabricks/test-test_L2", + "preprocessing/parabricks/test-test_L2/test-test_L2.cram", + "preprocessing/parabricks/test-test_L2/test-test_L2.cram.crai", + "preprocessing/parabricks/test/test.cram", + "preprocessing/parabricks/test/test.cram.crai", + "reference", + "reference/bwa", + "reference/bwa/genome.fasta.amb", + "reference/bwa/genome.fasta.ann", + "reference/bwa/genome.fasta.bwt", + "reference/bwa/genome.fasta.pac", + "reference/bwa/genome.fasta.sa", + "reference/intervals", + "reference/intervals/chr22_1-40001.bed", + "reference/intervals/chr22_1-40001.bed.gz", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz", + "reports", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test-test_L2", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.zip", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.zip", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats" + ], + [ + "fastqc-status-check-heatmap.txt:md5,a020b9689ddeb4abec16b4854fe452f1", + "fastqc_adapter_content_plot.txt:md5,2e1b72be741319e7fadbbb39d7e5b37d", + "fastqc_per_base_n_content_plot.txt:md5,ad3b971a6bb4e8ba6c844c8a03584eb8", + "fastqc_per_base_sequence_quality_plot.txt:md5,1bc03889d243a944253ac637d81ae10c", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,2c42d140ce06c08dad2b58f397c23239", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,59e22821d350bfb97c37ffd9088f5ad9", + "fastqc_per_sequence_quality_scores_plot.txt:md5,f33615cc98bb6225f39545a415fa7c0f", + "fastqc_sequence_counts_plot.txt:md5,7f0f19a58e8e54e792a751fd04a9ae13", + "fastqc_sequence_duplication_levels_plot.txt:md5,92b02e250ff78725deb9a10d510fcecc", + "fastqc_sequence_length_distribution_plot.txt:md5,fb04dce68ec566314125bc9438211b28", + "mosdepth-coverage-per-contig-single.txt:md5,f3a4ba86603c5121a26f54616a39c1ba", + "mosdepth-cumcoverage-dist-id.txt:md5,171f3912fd2b6c5e9c9602e5527bdff7", + "mosdepth_perchrom.txt:md5,f3a4ba86603c5121a26f54616a39c1ba", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "multiqc_fastqc.txt:md5,bde0d0bffa62228b33fb68b7e25b6ff8", + "samtools-stats-dp.txt:md5,068d671d8e3fa0a12b9c36072fb36808", + "samtools_alignment_plot.txt:md5,d2729501a111a33268c008ef6f18c7cd", + "genome.fasta.amb:md5,1891c1de381b3a96d4e72f590fde20c1", + "genome.fasta.ann:md5,2df4aa2d7580639fa0fcdbcad5e2e969", + "genome.fasta.bwt:md5,815eded87e4cb6b0f1daab5c4d6e30af", + "genome.fasta.pac:md5,8569fbdb2c98c6fb16dfa73d8eacb070", + "genome.fasta.sa:md5,e7cff62b919448a3a3d0fe4aaf427594", + "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "test.recal.mosdepth.global.dist.txt:md5,45016e5a0b9afb065c56833ba66f3049", + "test.recal.mosdepth.region.dist.txt:md5,45016e5a0b9afb065c56833ba66f3049", + "test.recal.mosdepth.summary.txt:md5,3968d28e14fd99350ec75e919778ad0f", + "test.recal.per-base.bed.gz:md5,ab3fb06c65b28f774b22973085699d51", + "test.recal.per-base.bed.gz.csi:md5,bc588fff78a04f4c31e16768ddde204d", + "test.recal.regions.bed.gz:md5,912777bfe53139edf4948106611d3aeb", + "test.recal.regions.bed.gz.csi:md5,e24653e1e1cc9703eecaee9ebf3d2b13" + ], + "No BAM files", + [ + "test-test_L1.cram:md5,4673edaace5f12d28b6904c40640d7bd", + "test-test_L2.cram:md5,4673edaace5f12d28b6904c40640d7bd", + "test.cram:md5,9da56e1adc10f4fa65303441b8a5a328" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-30T16:54:59.62879975" + }, + "-profile test --aligner parabricks --tools mutect2 --wes --input fastq_pair.csv": { + "content": [ + 26, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "CALCULATECONTAMINATION": { + "gatk4": "4.6.1.0" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FILTERMUTECTCALLS": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GETPILEUPSUMMARIES_NORMAL": { + "gatk4": "4.6.1.0" + }, + "GETPILEUPSUMMARIES_TUMOR": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "LEARNREADORIENTATIONMODEL": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MUTECT2_PAIRED": { + "gatk4": "4.6.1.0" + }, + "PARABRICKS_FQ2BAM": { + "parabricks": "4.6.0-1" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/mapped.csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/parabricks", + "preprocessing/parabricks/test", + "preprocessing/parabricks/test/test.cram", + "preprocessing/parabricks/test/test.cram.crai", + "preprocessing/parabricks/test2", + "preprocessing/parabricks/test2/test2.cram", + "preprocessing/parabricks/test2/test2.cram.crai", + "reference", + "reference/bwa", + "reference/bwa/genome.fasta.amb", + "reference/bwa/genome.fasta.ann", + "reference/bwa/genome.fasta.bwt", + "reference/bwa/genome.fasta.pac", + "reference/bwa/genome.fasta.sa", + "reference/intervals", + "reference/intervals/chr22_1-40001.bed", + "reference/intervals/chr22_1-40001.bed.gz", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz", + "reports", + "reports/bcftools", + "reports/bcftools/mutect2", + "reports/bcftools/mutect2/test2_vs_test", + "reports/bcftools/mutect2/test2_vs_test/test2_vs_test.mutect2.filtered.bcftools_stats.txt", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test2-test_L1", + "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.html", + "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.zip", + "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.html", + "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.zip", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/mosdepth/test2", + "reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test2/test2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test2/test2.recal.mosdepth.summary.txt", + "reports/mosdepth/test2/test2.recal.per-base.bed.gz", + "reports/mosdepth/test2/test2.recal.per-base.bed.gz.csi", + "reports/mosdepth/test2/test2.recal.regions.bed.gz", + "reports/mosdepth/test2/test2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/samtools/test2", + "reports/samtools/test2/test2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/mutect2", + "reports/vcftools/mutect2/test2_vs_test", + "reports/vcftools/mutect2/test2_vs_test/test2_vs_test.mutect2.filtered.FILTER.summary", + "reports/vcftools/mutect2/test2_vs_test/test2_vs_test.mutect2.filtered.TsTv.count", + "reports/vcftools/mutect2/test2_vs_test/test2_vs_test.mutect2.filtered.TsTv.qual", + "variant_calling", + "variant_calling/mutect2", + "variant_calling/mutect2/test", + "variant_calling/mutect2/test/test.mutect2.pileups.table", + "variant_calling/mutect2/test2", + "variant_calling/mutect2/test2/test2.mutect2.pileups.table", + "variant_calling/mutect2/test2_vs_test", + "variant_calling/mutect2/test2_vs_test/test2_vs_test.mutect2.artifactprior.tar.gz", + "variant_calling/mutect2/test2_vs_test/test2_vs_test.mutect2.contamination.table", + "variant_calling/mutect2/test2_vs_test/test2_vs_test.mutect2.filtered.vcf.gz", + "variant_calling/mutect2/test2_vs_test/test2_vs_test.mutect2.filtered.vcf.gz.filteringStats.tsv", + "variant_calling/mutect2/test2_vs_test/test2_vs_test.mutect2.filtered.vcf.gz.tbi", + "variant_calling/mutect2/test2_vs_test/test2_vs_test.mutect2.segmentation.table", + "variant_calling/mutect2/test2_vs_test/test2_vs_test.mutect2.vcf.gz", + "variant_calling/mutect2/test2_vs_test/test2_vs_test.mutect2.vcf.gz.stats", + "variant_calling/mutect2/test2_vs_test/test2_vs_test.mutect2.vcf.gz.tbi" + ], + [ + "fastqc-status-check-heatmap.txt:md5,eeb4e7e7a45f4223c86bfe3aea81f90b", + "fastqc_adapter_content_plot.txt:md5,cc7a809f9f001c10646ee4199ccdb40f", + "fastqc_per_base_n_content_plot.txt:md5,1eba855ae0fa5b5ed4a1f90d1c97f759", + "fastqc_per_base_sequence_quality_plot.txt:md5,cbb2743dfb2ec74e72b578c83ec28ee8", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,73c884822eba0bafcdf34b90fe81aec5", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,24eeb00e5e2b11c7ab90a3223d429d15", + "fastqc_per_sequence_quality_scores_plot.txt:md5,6f048594f02effb93608665be29bd35a", + "fastqc_sequence_counts_plot.txt:md5,fca7ee9ef3382e2837a302d8c5d33769", + "fastqc_sequence_duplication_levels_plot.txt:md5,2aa0c6f33e4cffbb29cdabe2c28bb097", + "fastqc_sequence_length_distribution_plot.txt:md5,61b1fe978a2c73b86c30c27ee4bc60ae", + "mosdepth-coverage-per-contig-single.txt:md5,8973b4a63f05d9443da33ccd1fd48cb1", + "mosdepth-cumcoverage-dist-id.txt:md5,098acc5aee768b62136e92fddab58df1", + "mosdepth_perchrom.txt:md5,8973b4a63f05d9443da33ccd1fd48cb1", + "multiqc_citations.txt:md5,e49571d90d78a0d1bc919b82d7dc48c6", + "multiqc_fastqc.txt:md5,2fd25e8c81f962594b801d5a9df3cd87", + "samtools-stats-dp.txt:md5,a2f978ab572ed3c72e3af6e605708d59", + "samtools_alignment_plot.txt:md5,94d6398ce03eefb42aa6f106c8d36e89", + "genome.fasta.amb:md5,1891c1de381b3a96d4e72f590fde20c1", + "genome.fasta.ann:md5,2df4aa2d7580639fa0fcdbcad5e2e969", + "genome.fasta.bwt:md5,815eded87e4cb6b0f1daab5c4d6e30af", + "genome.fasta.pac:md5,8569fbdb2c98c6fb16dfa73d8eacb070", + "genome.fasta.sa:md5,e7cff62b919448a3a3d0fe4aaf427594", + "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "test2_vs_test.mutect2.filtered.bcftools_stats.txt:md5,46384bbd90894ff023684d88df891fd6", + "test.recal.mosdepth.global.dist.txt:md5,5a0679057c530e5945c9c5a3a17312dc", + "test.recal.mosdepth.region.dist.txt:md5,5a0679057c530e5945c9c5a3a17312dc", + "test.recal.mosdepth.summary.txt:md5,2ea3534987f28b3ed0b64a8e7986b442", + "test.recal.per-base.bed.gz:md5,e04122db11a66ad6ef7c851511b1b505", + "test.recal.per-base.bed.gz.csi:md5,a0c543664c7e5902d1cb56c25f4123b3", + "test.recal.regions.bed.gz:md5,339335cc6d0a8048a3e328eaa93c5160", + "test.recal.regions.bed.gz.csi:md5,a43bfb8c598e3091406b25ba174c0270", + "test2.recal.mosdepth.global.dist.txt:md5,f25166c3a0051bb4d8c11a210278de6c", + "test2.recal.mosdepth.region.dist.txt:md5,f25166c3a0051bb4d8c11a210278de6c", + "test2.recal.mosdepth.summary.txt:md5,325213ef7bbb89b9ae27ef59252eca49", + "test2.recal.per-base.bed.gz:md5,027f5d98da0e283df866f2d9ac8255db", + "test2.recal.per-base.bed.gz.csi:md5,c471e17d82fddaeca77167331dead581", + "test2.recal.regions.bed.gz:md5,7f23663b3565fb9546dfaa829dd68c61", + "test2.recal.regions.bed.gz.csi:md5,e24653e1e1cc9703eecaee9ebf3d2b13", + "test2_vs_test.mutect2.filtered.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "test2_vs_test.mutect2.filtered.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f", + "test.mutect2.pileups.table:md5,4a89cfa663f1552aeb80d5c31aa36e3d", + "test2.mutect2.pileups.table:md5,a4069de22dd8deaeea222115f2738713", + "test2_vs_test.mutect2.contamination.table:md5,9c5a4666d682b84be6bf5ecf4305b15c", + "test2_vs_test.mutect2.filtered.vcf.gz.filteringStats.tsv:md5,98e1b87a52999eb8f429ef4a7877eb3f", + "test2_vs_test.mutect2.segmentation.table:md5,abed02f7a49bca992eb4e6f0006df85b", + "test2_vs_test.mutect2.vcf.gz.stats:md5,cfec9ffc08ea78f14706db59e38dc85b" + ], + "No BAM files", + [ + "test.cram:md5,4673edaace5f12d28b6904c40640d7bd", + "test2.cram:md5,bde66820387c9edcff38a2dfbcb8339e" + ], + [ + "test2_vs_test.mutect2.filtered.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "test2_vs_test.mutect2.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998513#1/1", + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1", + "WARN: No Panel-of-normal was specified for Mutect2." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-30T13:32:40.97607962" + }, + "-profile test --aligner parabricks --wes --intervals": { + "content": [ + 13, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MERGE_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "PARABRICKS_FQ2BAM": { + "parabricks": "4.6.0-1" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/parabricks", + "preprocessing/parabricks/test", + "preprocessing/parabricks/test-test_L1", + "preprocessing/parabricks/test-test_L1/test-test_L1.cram", + "preprocessing/parabricks/test-test_L1/test-test_L1.cram.crai", + "preprocessing/parabricks/test-test_L2", + "preprocessing/parabricks/test-test_L2/test-test_L2.cram", + "preprocessing/parabricks/test-test_L2/test-test_L2.cram.crai", + "preprocessing/parabricks/test/test.cram", + "preprocessing/parabricks/test/test.cram.crai", + "reference", + "reference/bwa", + "reference/bwa/genome.fasta.amb", + "reference/bwa/genome.fasta.ann", + "reference/bwa/genome.fasta.bwt", + "reference/bwa/genome.fasta.pac", + "reference/bwa/genome.fasta.sa", + "reference/intervals", + "reference/intervals/chr22_2-15000.bed", + "reference/intervals/chr22_2-15000.bed.gz", + "reference/intervals/genome.multi_intervals.bed.gz", + "reports", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test-test_L2", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.zip", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.zip", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats" + ], + [ + "fastqc-status-check-heatmap.txt:md5,a020b9689ddeb4abec16b4854fe452f1", + "fastqc_adapter_content_plot.txt:md5,2e1b72be741319e7fadbbb39d7e5b37d", + "fastqc_per_base_n_content_plot.txt:md5,ad3b971a6bb4e8ba6c844c8a03584eb8", + "fastqc_per_base_sequence_quality_plot.txt:md5,1bc03889d243a944253ac637d81ae10c", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,2c42d140ce06c08dad2b58f397c23239", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,59e22821d350bfb97c37ffd9088f5ad9", + "fastqc_per_sequence_quality_scores_plot.txt:md5,f33615cc98bb6225f39545a415fa7c0f", + "fastqc_sequence_counts_plot.txt:md5,7f0f19a58e8e54e792a751fd04a9ae13", + "fastqc_sequence_duplication_levels_plot.txt:md5,92b02e250ff78725deb9a10d510fcecc", + "fastqc_sequence_length_distribution_plot.txt:md5,fb04dce68ec566314125bc9438211b28", + "mosdepth-coverage-per-contig-single.txt:md5,63cfbfc1ea970a26a7f0470efd9ad692", + "mosdepth-cumcoverage-dist-id.txt:md5,4a65b87249d6cbff198986e795ad588d", + "mosdepth_perchrom.txt:md5,63cfbfc1ea970a26a7f0470efd9ad692", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "multiqc_fastqc.txt:md5,bde0d0bffa62228b33fb68b7e25b6ff8", + "samtools-stats-dp.txt:md5,068d671d8e3fa0a12b9c36072fb36808", + "samtools_alignment_plot.txt:md5,d2729501a111a33268c008ef6f18c7cd", + "genome.fasta.amb:md5,1891c1de381b3a96d4e72f590fde20c1", + "genome.fasta.ann:md5,2df4aa2d7580639fa0fcdbcad5e2e969", + "genome.fasta.bwt:md5,815eded87e4cb6b0f1daab5c4d6e30af", + "genome.fasta.pac:md5,8569fbdb2c98c6fb16dfa73d8eacb070", + "genome.fasta.sa:md5,e7cff62b919448a3a3d0fe4aaf427594", + "chr22_2-15000.bed:md5,3cb9e9a26427faaa23b59b2ebcf55f4d", + "chr22_2-15000.bed.gz:md5,3cb9e9a26427faaa23b59b2ebcf55f4d", + "genome.multi_intervals.bed.gz:md5,3cb9e9a26427faaa23b59b2ebcf55f4d", + "test.recal.mosdepth.global.dist.txt:md5,45016e5a0b9afb065c56833ba66f3049", + "test.recal.mosdepth.region.dist.txt:md5,a9e42dc9d21073821858d40ffce5bcde", + "test.recal.mosdepth.summary.txt:md5,ac93e4aa5a5229d001404513816cc219", + "test.recal.per-base.bed.gz:md5,ab3fb06c65b28f774b22973085699d51", + "test.recal.per-base.bed.gz.csi:md5,bc588fff78a04f4c31e16768ddde204d", + "test.recal.regions.bed.gz:md5,326c62bcdb340f213cc4514213516415", + "test.recal.regions.bed.gz.csi:md5,5c00a1d457c387d6e71848a6d897e309" + ], + "No BAM files", + [ + "test-test_L1.cram:md5,4673edaace5f12d28b6904c40640d7bd", + "test-test_L2.cram:md5,4673edaace5f12d28b6904c40640d7bd", + "test.cram:md5,9da56e1adc10f4fa65303441b8a5a328" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-30T16:51:33.339994069" + } +} diff --git a/tests/alignment_from_everything.nf.test b/tests/alignment_from_everything.nf.test new file mode 100644 index 0000000000..af8dc2effc --- /dev/null +++ b/tests/alignment_from_everything.nf.test @@ -0,0 +1,25 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --input bam_and_fastq_and_spring.csv --save_mapped --save_output_as_bam", + params: [ + input: "${projectDir}/tests/csv/3.0/bam_and_fastq_and_spring.csv", + tools: '', + save_mapped: true, + save_output_as_bam: true + ], + snapshot_ignore: 'Cannot extract flowcell ID' + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/alignment_from_everything.nf.test.snap b/tests/alignment_from_everything.nf.test.snap new file mode 100644 index 0000000000..95be7f7f71 --- /dev/null +++ b/tests/alignment_from_everything.nf.test.snap @@ -0,0 +1,395 @@ +{ + "-profile test --input bam_and_fastq_and_spring.csv --save_mapped --save_output_as_bam": { + "content": [ + 53, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CAT_FASTQ": { + "cat": 9.5 + }, + "COLLATE_FASTQ_MAP": { + "samtools": 1.21 + }, + "COLLATE_FASTQ_UNMAP": { + "samtools": 1.21 + }, + "CRAM_TO_BAM": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_MERGE_UNMAP": { + "samtools": 1.21 + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_MAP_MAP": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_MAP_UNMAP": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_UNMAP_MAP": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_UNMAP_UNMAP": { + "samtools": 1.21 + }, + "SPRING_DECOMPRESS_TO_FQ_PAIR": { + "spring": "1.1.1" + }, + "SPRING_DECOMPRESS_TO_R1_FQ": { + "spring": "1.1.1" + }, + "SPRING_DECOMPRESS_TO_R2_FQ": { + "spring": "1.1.1" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_overrepresented_sequences_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_overrepresented_sequences_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_top_overrepresented_sequences_table.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_overrepresented_sequences_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/fastqc_top_overrepresented_sequences_table.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_overrepresented_sequences_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_top_overrepresented_sequences_table.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.bam", + "preprocessing/mapped/test/test.sorted.bam.bai", + "preprocessing/mapped/test2", + "preprocessing/mapped/test2/test2.sorted.bam", + "preprocessing/mapped/test2/test2.sorted.bam.bai", + "preprocessing/mapped/test3", + "preprocessing/mapped/test3/test3.sorted.bam", + "preprocessing/mapped/test3/test3.sorted.bam.bai", + "preprocessing/mapped/test_bam", + "preprocessing/mapped/test_bam/test_bam.sorted.bam", + "preprocessing/mapped/test_bam/test_bam.sorted.bam.bai", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.bam", + "preprocessing/markduplicates/test/test.md.bam.bai", + "preprocessing/markduplicates/test2", + "preprocessing/markduplicates/test2/test2.md.bam", + "preprocessing/markduplicates/test2/test2.md.bam.bai", + "preprocessing/markduplicates/test3", + "preprocessing/markduplicates/test3/test3.md.bam", + "preprocessing/markduplicates/test3/test3.md.bam.bai", + "preprocessing/markduplicates/test_bam", + "preprocessing/markduplicates/test_bam/test_bam.md.bam", + "preprocessing/markduplicates/test_bam/test_bam.md.bam.bai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recal_table/test2", + "preprocessing/recal_table/test2/test2.recal.table", + "preprocessing/recal_table/test3", + "preprocessing/recal_table/test3/test3.recal.table", + "preprocessing/recal_table/test_bam", + "preprocessing/recal_table/test_bam/test_bam.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.bam", + "preprocessing/recalibrated/test/test.recal.bam.bai", + "preprocessing/recalibrated/test2", + "preprocessing/recalibrated/test2/test2.recal.bam", + "preprocessing/recalibrated/test2/test2.recal.bam.bai", + "preprocessing/recalibrated/test3", + "preprocessing/recalibrated/test3/test3.recal.bam", + "preprocessing/recalibrated/test3/test3.recal.bam.bai", + "preprocessing/recalibrated/test_bam", + "preprocessing/recalibrated/test_bam/test_bam.recal.bam", + "preprocessing/recalibrated/test_bam/test_bam.recal.bam.bai", + "reference", + "reports", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test2-test2_L1", + "reports/fastqc/test2-test2_L1/test2-test2_L1_1_fastqc.html", + "reports/fastqc/test2-test2_L1/test2-test2_L1_1_fastqc.zip", + "reports/fastqc/test2-test2_L1/test2-test2_L1_2_fastqc.html", + "reports/fastqc/test2-test2_L1/test2-test2_L1_2_fastqc.zip", + "reports/fastqc/test3-test3_L1", + "reports/fastqc/test3-test3_L1/test3-test3_L1_1_fastqc.html", + "reports/fastqc/test3-test3_L1/test3-test3_L1_1_fastqc.zip", + "reports/fastqc/test3-test3_L1/test3-test3_L1_2_fastqc.html", + "reports/fastqc/test3-test3_L1/test3-test3_L1_2_fastqc.zip", + "reports/fastqc/test_bam-test_bam_L1", + "reports/fastqc/test_bam-test_bam_L1/test_bam-test_bam_L1_1_fastqc.html", + "reports/fastqc/test_bam-test_bam_L1/test_bam-test_bam_L1_1_fastqc.zip", + "reports/fastqc/test_bam-test_bam_L1/test_bam-test_bam_L1_2_fastqc.html", + "reports/fastqc/test_bam-test_bam_L1/test_bam-test_bam_L1_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/markduplicates/test2", + "reports/markduplicates/test2/test2.md.cram.metrics", + "reports/markduplicates/test3", + "reports/markduplicates/test3/test3.md.cram.metrics", + "reports/markduplicates/test_bam", + "reports/markduplicates/test_bam/test_bam.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test2", + "reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt", + "reports/mosdepth/test2/test2.md.mosdepth.region.dist.txt", + "reports/mosdepth/test2/test2.md.mosdepth.summary.txt", + "reports/mosdepth/test2/test2.md.regions.bed.gz", + "reports/mosdepth/test2/test2.md.regions.bed.gz.csi", + "reports/mosdepth/test3", + "reports/mosdepth/test3/test3.md.mosdepth.global.dist.txt", + "reports/mosdepth/test3/test3.md.mosdepth.region.dist.txt", + "reports/mosdepth/test3/test3.md.mosdepth.summary.txt", + "reports/mosdepth/test3/test3.md.regions.bed.gz", + "reports/mosdepth/test3/test3.md.regions.bed.gz.csi", + "reports/mosdepth/test_bam", + "reports/mosdepth/test_bam/test_bam.md.mosdepth.global.dist.txt", + "reports/mosdepth/test_bam/test_bam.md.mosdepth.region.dist.txt", + "reports/mosdepth/test_bam/test_bam.md.mosdepth.summary.txt", + "reports/mosdepth/test_bam/test_bam.md.regions.bed.gz", + "reports/mosdepth/test_bam/test_bam.md.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test2", + "reports/samtools/test2/test2.md.cram.stats", + "reports/samtools/test3", + "reports/samtools/test3/test3.md.cram.stats", + "reports/samtools/test_bam", + "reports/samtools/test_bam/test_bam.md.cram.stats" + ], + [ + "fastqc-status-check-heatmap.txt:md5,de62b83776610478e5d6a3adbc6a9bbc", + "fastqc_adapter_content_plot.txt:md5,eafed69132feeb2315e840c2d2ebd7ff", + "fastqc_overrepresented_sequences_plot.txt:md5,36b5440af4638da42bbbd0182c3fd647", + "fastqc_per_base_n_content_plot.txt:md5,7e0f8b689096c9ef19aefdf26fd28356", + "fastqc_per_base_sequence_quality_plot.txt:md5,6ebb5afb5d57437615b3f5c75124293b", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,b5da0f4c58b82ac5da26ac557aba229a", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,9f9788180596f147ec39e55c784899d1", + "fastqc_per_sequence_quality_scores_plot.txt:md5,5b1a8502b3e1e294a0499b88de2966d9", + "fastqc_sequence_counts_plot.txt:md5,674147ee973b56cff1a8bed894d18d3f", + "fastqc_sequence_duplication_levels_plot.txt:md5,5dbfcf6fcbbd054e17fb460985b078c2", + "fastqc_sequence_length_distribution_plot.txt:md5,893426c7b64f5fe3279733eb63c6abf9", + "mosdepth-coverage-per-contig-single.txt:md5,7c01ea3a011867911ed0b612acb42007", + "mosdepth-cumcoverage-dist-id.txt:md5,c013414788f3acfff722a725b5c4938e", + "mosdepth_perchrom.txt:md5,7c01ea3a011867911ed0b612acb42007", + "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", + "multiqc_fastqc.txt:md5,ae908328010c1f8a5eb15ca080b92993", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,1dc593231b888b9f832e61dd71e0161b", + "samtools_alignment_plot.txt:md5,35a1db2ce8dbe1b59576cc8edb57095e", + "test.md.mosdepth.global.dist.txt:md5,76fa71922a3f748e507c2364c531dfcb", + "test.md.mosdepth.region.dist.txt:md5,abc5df85e302b79985627888870882da", + "test.md.mosdepth.summary.txt:md5,d536456436eb275159b8c6af83213d80", + "test.md.regions.bed.gz:md5,b25a2798061021c0b2f4e1d18219bbbd", + "test.md.regions.bed.gz.csi:md5,b1c2a861f64e20a94108a6de3b76c582", + "test2.md.mosdepth.global.dist.txt:md5,76fa71922a3f748e507c2364c531dfcb", + "test2.md.mosdepth.region.dist.txt:md5,abc5df85e302b79985627888870882da", + "test2.md.mosdepth.summary.txt:md5,d536456436eb275159b8c6af83213d80", + "test2.md.regions.bed.gz:md5,b25a2798061021c0b2f4e1d18219bbbd", + "test2.md.regions.bed.gz.csi:md5,b1c2a861f64e20a94108a6de3b76c582", + "test3.md.mosdepth.global.dist.txt:md5,76fa71922a3f748e507c2364c531dfcb", + "test3.md.mosdepth.region.dist.txt:md5,abc5df85e302b79985627888870882da", + "test3.md.mosdepth.summary.txt:md5,d536456436eb275159b8c6af83213d80", + "test3.md.regions.bed.gz:md5,b25a2798061021c0b2f4e1d18219bbbd", + "test3.md.regions.bed.gz.csi:md5,b1c2a861f64e20a94108a6de3b76c582", + "test_bam.md.mosdepth.global.dist.txt:md5,9cb9b181119256ed17a77dcf44d58285", + "test_bam.md.mosdepth.region.dist.txt:md5,75e1ce7e55af51f4985fa91654a5ea2d", + "test_bam.md.mosdepth.summary.txt:md5,dbe376360e437c89190139ef0ae6769a", + "test_bam.md.regions.bed.gz:md5,0e5ed846b9b11717e42e93eec60f4ffc", + "test_bam.md.regions.bed.gz.csi:md5,d0713716f63ac573f4a3385733e9a537" + ], + [ + "test.sorted.bam:md5,59ecc5c82c7af1283eea7507c590c831", + "test2.sorted.bam:md5,59ecc5c82c7af1283eea7507c590c831", + "test3.sorted.bam:md5,59ecc5c82c7af1283eea7507c590c831", + "test_bam.sorted.bam:md5,6934a96fff1eaa2f70b31ab7e11d3598", + "test.md.bam:md5,59ecc5c82c7af1283eea7507c590c831", + "test2.md.bam:md5,59ecc5c82c7af1283eea7507c590c831", + "test3.md.bam:md5,59ecc5c82c7af1283eea7507c590c831", + "test_bam.md.bam:md5,6934a96fff1eaa2f70b31ab7e11d3598", + "test.recal.bam:md5,654909615a48db30bdc14ec4d9d7d17c", + "test2.recal.bam:md5,654909615a48db30bdc14ec4d9d7d17c", + "test3.recal.bam:md5,654909615a48db30bdc14ec4d9d7d17c", + "test_bam.recal.bam:md5,95a95d43e1db35c58b311dee4338c05b" + ], + "No CRAM files", + "No VCF files", + "No warnings" + ], + "timestamp": "2026-04-09T13:11:35.435247726", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/tests/alignment_to_fastq.nf.test b/tests/alignment_to_fastq.nf.test new file mode 100644 index 0000000000..19ffcbe9f8 --- /dev/null +++ b/tests/alignment_to_fastq.nf.test @@ -0,0 +1,24 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --input bam_for_remapping.csv --save_mapped --save_output_as_bam", + params: [ + input: "${projectDir}/tests/csv/3.0/bam_for_remapping.csv", + tools: '', + save_mapped: true, + save_output_as_bam: true + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/alignment_to_fastq.nf.test.snap b/tests/alignment_to_fastq.nf.test.snap new file mode 100644 index 0000000000..945d5246c2 --- /dev/null +++ b/tests/alignment_to_fastq.nf.test.snap @@ -0,0 +1,279 @@ +{ + "-profile test --input bam_for_remapping.csv --save_mapped --save_output_as_bam": { + "content": [ + 23, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CAT_FASTQ": { + "cat": 9.5 + }, + "COLLATE_FASTQ_MAP": { + "samtools": 1.21 + }, + "COLLATE_FASTQ_UNMAP": { + "samtools": 1.21 + }, + "CRAM_TO_BAM": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_MERGE_UNMAP": { + "samtools": 1.21 + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_MAP_MAP": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_MAP_UNMAP": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_UNMAP_MAP": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_UNMAP_UNMAP": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_overrepresented_sequences_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_overrepresented_sequences_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_top_overrepresented_sequences_table.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_overrepresented_sequences_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/fastqc_top_overrepresented_sequences_table.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_overrepresented_sequences_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_top_overrepresented_sequences_table.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.bam", + "preprocessing/mapped/test/test.sorted.bam.bai", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.bam", + "preprocessing/markduplicates/test/test.md.bam.bai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.bam", + "preprocessing/recalibrated/test/test.recal.bam.bai", + "reference", + "reports", + "reports/fastqc", + "reports/fastqc/test-1", + "reports/fastqc/test-1/test-1_1_fastqc.html", + "reports/fastqc/test-1/test-1_1_fastqc.zip", + "reports/fastqc/test-1/test-1_2_fastqc.html", + "reports/fastqc/test-1/test-1_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats" + ], + [ + "fastqc-status-check-heatmap.txt:md5,0c9e61033b0669f20e27dce8f4b3b25c", + "fastqc_overrepresented_sequences_plot.txt:md5,204591ae3929c2d5c7af72d3012c1675", + "fastqc_per_base_n_content_plot.txt:md5,bc7b5536aa1867577c34defe2fc2af3f", + "fastqc_per_base_sequence_quality_plot.txt:md5,2c16d19197f73195d3369744fb281189", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,71ce73a79821c103d82139250280c146", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,144b778df9c67bb144b32193544777fb", + "fastqc_per_sequence_quality_scores_plot.txt:md5,735220d7946ff36b933daf1de284747b", + "fastqc_sequence_counts_plot.txt:md5,bcd6fbad2fcbfcd6e78f7392945471cb", + "fastqc_sequence_duplication_levels_plot.txt:md5,65c8986b4ea2983999508c5d8fce0df5", + "fastqc_sequence_length_distribution_plot.txt:md5,8f2a67d6bf43abce206aa77c2959b420", + "mosdepth-coverage-per-contig-single.txt:md5,76d816c3f71faf2009c8a6f88092a2f3", + "mosdepth-cumcoverage-dist-id.txt:md5,3af8f7d8ed7d1fdff6118e0098258192", + "mosdepth_perchrom.txt:md5,76d816c3f71faf2009c8a6f88092a2f3", + "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", + "multiqc_fastqc.txt:md5,6fa90a9d9daa7d4839e4b7ebfcdc6201", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,ef82cfdd058c212c0bba38ceb32ec2f8", + "samtools_alignment_plot.txt:md5,22572fcd0791878ed37ae2f48213cee2", + "test.md.mosdepth.global.dist.txt:md5,9cb9b181119256ed17a77dcf44d58285", + "test.md.mosdepth.region.dist.txt:md5,75e1ce7e55af51f4985fa91654a5ea2d", + "test.md.mosdepth.summary.txt:md5,dbe376360e437c89190139ef0ae6769a", + "test.md.regions.bed.gz:md5,0e5ed846b9b11717e42e93eec60f4ffc", + "test.md.regions.bed.gz.csi:md5,d0713716f63ac573f4a3385733e9a537" + ], + [ + "test.sorted.bam:md5,6934a96fff1eaa2f70b31ab7e11d3598", + "test.md.bam:md5,6934a96fff1eaa2f70b31ab7e11d3598", + "test.recal.bam:md5,95a95d43e1db35c58b311dee4338c05b" + ], + "No CRAM files", + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T20:13:07.668460853" + } +} diff --git a/tests/annotation_bcfann.nf.test b/tests/annotation_bcfann.nf.test new file mode 100644 index 0000000000..17a7377137 --- /dev/null +++ b/tests/annotation_bcfann.nf.test @@ -0,0 +1,32 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools bcfann", + params: [ + input: "${projectDir}/tests/csv/3.0/vcf_single.csv", + step: 'annotate', + tools: 'bcfann' + ] + ], + [ + name: "-profile test --tools bcfann --bcftools_columns", + params: [ + input: "${projectDir}/tests/csv/3.0/vcf_single.csv", + bcftools_columns: "${projectDir}/tests/config/bcfann_test_columns.txt", + step: 'annotate', + tools: 'bcfann' + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/annotation_bcfann.nf.test.snap b/tests/annotation_bcfann.nf.test.snap new file mode 100644 index 0000000000..45585928f9 --- /dev/null +++ b/tests/annotation_bcfann.nf.test.snap @@ -0,0 +1,88 @@ +{ + "-profile test --tools bcfann": { + "content": [ + 2, + { + "BCFTOOLS_ANNOTATE": { + "bcftools": 1.21 + } + }, + [ + "annotation", + "annotation/test", + "annotation/test/test_BCF.ann.vcf.gz", + "annotation/test/test_BCF.ann.vcf.gz.tbi", + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml" + ], + [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" + ], + "No BAM files", + "No CRAM files", + [ + "test_BCF.ann.vcf.gz:md5,bc7bf3ee9e8430e064c539eb81e59bf9" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-08-14T09:55:08.446532066" + }, + "-profile test --tools bcfann --bcftools_columns": { + "content": [ + 2, + { + "BCFTOOLS_ANNOTATE": { + "bcftools": 1.21 + } + }, + [ + "annotation", + "annotation/test", + "annotation/test/test_BCF.ann.vcf.gz", + "annotation/test/test_BCF.ann.vcf.gz.tbi", + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml" + ], + [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" + ], + "No BAM files", + "No CRAM files", + [ + "test_BCF.ann.vcf.gz:md5,bc7bf3ee9e8430e064c539eb81e59bf9" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-10-01T09:52:49.578328635" + } +} \ No newline at end of file diff --git a/tests/annotation_merge.nf.test b/tests/annotation_merge.nf.test new file mode 100644 index 0000000000..bddf361106 --- /dev/null +++ b/tests/annotation_merge.nf.test @@ -0,0 +1,40 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools merge", + params: [ + input: "${projectDir}/tests/csv/3.0/vcf_single.csv", + step: 'annotate', + tools: 'merge' + ] + ], + [ + name: "-profile test --tools merge,snpeff,vep", + params: [ + input: "${projectDir}/tests/csv/3.0/vcf_single.csv", + step: 'annotate', + tools: 'merge,snpeff,vep' + ] + ], + [ + name: "-profile test --tools merge,snpsift", + params: [ + input: "${projectDir}/tests/csv/3.0/vcf_single.csv", + step: 'annotate', + tools: 'merge,snpsift', + snpsift_databases: "${projectDir}/tests/config/snpsift_test_databases.csv" + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/annotation_merge.nf.test.snap b/tests/annotation_merge.nf.test.snap new file mode 100644 index 0000000000..1c18f68ad8 --- /dev/null +++ b/tests/annotation_merge.nf.test.snap @@ -0,0 +1,326 @@ +{ + "-profile test --tools merge,snpeff,vep": { + "content": [ + 5, + { + "ENSEMBLVEP_VEP": { + "ensemblvep": "115.2", + "perl-math-cdf": "0.1", + "tabix": "1.21" + }, + "SNPEFF_SNPEFF": { + "snpeff": "5.4a" + }, + "TABIX_BGZIPTABIX": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCF_ANNOTATE_MERGE": { + "ensemblvep": "115.2", + "perl-math-cdf": "0.1", + "tabix": "1.21" + } + }, + [ + "annotation", + "annotation/test", + "annotation/test/test_VEP.ann.vcf.gz", + "annotation/test/test_VEP.ann.vcf.gz.tbi", + "annotation/test/test_snpEff.ann.vcf.gz", + "annotation/test/test_snpEff.ann.vcf.gz.tbi", + "annotation/test/test_snpEff_VEP.ann.vcf.gz", + "annotation/test/test_snpEff_VEP.ann.vcf.gz.tbi", + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_snpeff.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/snpeff_effects.txt", + "multiqc/multiqc_data/snpeff_qualities.txt", + "multiqc/multiqc_data/snpeff_variant_effects_region.txt", + "multiqc/multiqc_data/vep-general-stats.txt", + "multiqc/multiqc_data/vep.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/snpeff_effects-cnt.pdf", + "multiqc/multiqc_plots/pdf/snpeff_effects-pct.pdf", + "multiqc/multiqc_plots/pdf/snpeff_qualities.pdf", + "multiqc/multiqc_plots/pdf/snpeff_variant_effects_region-cnt.pdf", + "multiqc/multiqc_plots/pdf/snpeff_variant_effects_region-log.pdf", + "multiqc/multiqc_plots/pdf/snpeff_variant_effects_region-pct-log.pdf", + "multiqc/multiqc_plots/pdf/snpeff_variant_effects_region-pct.pdf", + "multiqc/multiqc_plots/pdf/vep-general-stats.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/snpeff_effects-cnt.png", + "multiqc/multiqc_plots/png/snpeff_effects-pct.png", + "multiqc/multiqc_plots/png/snpeff_qualities.png", + "multiqc/multiqc_plots/png/snpeff_variant_effects_region-cnt.png", + "multiqc/multiqc_plots/png/snpeff_variant_effects_region-log.png", + "multiqc/multiqc_plots/png/snpeff_variant_effects_region-pct-log.png", + "multiqc/multiqc_plots/png/snpeff_variant_effects_region-pct.png", + "multiqc/multiqc_plots/png/vep-general-stats.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/snpeff_effects-cnt.svg", + "multiqc/multiqc_plots/svg/snpeff_effects-pct.svg", + "multiqc/multiqc_plots/svg/snpeff_qualities.svg", + "multiqc/multiqc_plots/svg/snpeff_variant_effects_region-cnt.svg", + "multiqc/multiqc_plots/svg/snpeff_variant_effects_region-log.svg", + "multiqc/multiqc_plots/svg/snpeff_variant_effects_region-pct-log.svg", + "multiqc/multiqc_plots/svg/snpeff_variant_effects_region-pct.svg", + "multiqc/multiqc_plots/svg/vep-general-stats.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reports", + "reports/EnsemblVEP", + "reports/EnsemblVEP/test", + "reports/EnsemblVEP/test/test_VEP.ann.summary.html", + "reports/EnsemblVEP/test/test_snpEff_VEP.ann.summary.html", + "reports/snpeff", + "reports/snpeff/test", + "reports/snpeff/test/snpEff_summary.html", + "reports/snpeff/test/test_snpEff.csv", + "reports/snpeff/test/test_snpEff.genes.txt" + ], + [ + "multiqc_citations.txt:md5,ebf9f49bc020eeb38546ddab3a98171e", + "multiqc_snpeff.txt:md5,03a2b1c461cb6e5cccac64033a2f6526", + "snpeff_effects.txt:md5,3c5e9a1c191b77c781dc4d033b1dd1f7", + "snpeff_qualities.txt:md5,4c059b4e8bf0a64940ad1d6e30efd3a6", + "snpeff_variant_effects_region.txt:md5,05efd324edadced17ba3cd2b7714af57", + "vep-general-stats.txt:md5,71c994ae4221384f4e22459723d29cd0", + "vep.txt:md5,20570f3e4e51407b860a31d7e1d59de0", + "test_snpEff.genes.txt:md5,130536bf0237d7f3f746d32aaa32840a" + ], + "No BAM files", + "No CRAM files", + [ + "test_VEP.ann.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_snpEff.ann.vcf.gz:md5,fd266e623365c7d1837bc9eb088fe70c", + "test_snpEff_VEP.ann.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-07T08:54:13.050728" + }, + "-profile test --tools merge,snpsift": { + "content": [ + 6, + { + "SNPEFF_SNPEFF": { + "snpeff": "5.4a" + }, + "SNPSIFT_ANNMEM": { + "snpsift": "5.4a" + }, + "SNPSIFT_ANNMEMCREATE": { + "snpsift": "5.4a" + }, + "TABIX_BGZIPTABIX": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCF_ANNOTATE_MERGE": { + "ensemblvep": "115.2", + "perl-math-cdf": "0.1", + "tabix": "1.21" + } + }, + [ + "annotation", + "annotation/test", + "annotation/test/test_snpEff_VEP.ann.vcf.gz", + "annotation/test/test_snpEff_VEP.ann.vcf.gz.tbi", + "annotation/test/test_snpEff_VEP_snpSift.ann.vcf.gz", + "annotation/test/test_snpEff_VEP_snpSift.ann.vcf.gz.tbi", + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_snpeff.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/snpeff_effects.txt", + "multiqc/multiqc_data/snpeff_qualities.txt", + "multiqc/multiqc_data/snpeff_variant_effects_region.txt", + "multiqc/multiqc_data/vep-general-stats.txt", + "multiqc/multiqc_data/vep.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/snpeff_effects-cnt.pdf", + "multiqc/multiqc_plots/pdf/snpeff_effects-pct.pdf", + "multiqc/multiqc_plots/pdf/snpeff_qualities.pdf", + "multiqc/multiqc_plots/pdf/snpeff_variant_effects_region-cnt.pdf", + "multiqc/multiqc_plots/pdf/snpeff_variant_effects_region-log.pdf", + "multiqc/multiqc_plots/pdf/snpeff_variant_effects_region-pct-log.pdf", + "multiqc/multiqc_plots/pdf/snpeff_variant_effects_region-pct.pdf", + "multiqc/multiqc_plots/pdf/vep-general-stats.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/snpeff_effects-cnt.png", + "multiqc/multiqc_plots/png/snpeff_effects-pct.png", + "multiqc/multiqc_plots/png/snpeff_qualities.png", + "multiqc/multiqc_plots/png/snpeff_variant_effects_region-cnt.png", + "multiqc/multiqc_plots/png/snpeff_variant_effects_region-log.png", + "multiqc/multiqc_plots/png/snpeff_variant_effects_region-pct-log.png", + "multiqc/multiqc_plots/png/snpeff_variant_effects_region-pct.png", + "multiqc/multiqc_plots/png/vep-general-stats.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/snpeff_effects-cnt.svg", + "multiqc/multiqc_plots/svg/snpeff_effects-pct.svg", + "multiqc/multiqc_plots/svg/snpeff_qualities.svg", + "multiqc/multiqc_plots/svg/snpeff_variant_effects_region-cnt.svg", + "multiqc/multiqc_plots/svg/snpeff_variant_effects_region-log.svg", + "multiqc/multiqc_plots/svg/snpeff_variant_effects_region-pct-log.svg", + "multiqc/multiqc_plots/svg/snpeff_variant_effects_region-pct.svg", + "multiqc/multiqc_plots/svg/vep-general-stats.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/snpsift", + "reports", + "reports/EnsemblVEP", + "reports/EnsemblVEP/test", + "reports/EnsemblVEP/test/test_snpEff_VEP.ann.summary.html", + "reports/snpeff", + "reports/snpeff/test" + ], + [ + "multiqc_citations.txt:md5,ebf9f49bc020eeb38546ddab3a98171e", + "multiqc_snpeff.txt:md5,03a2b1c461cb6e5cccac64033a2f6526", + "snpeff_effects.txt:md5,3c5e9a1c191b77c781dc4d033b1dd1f7", + "snpeff_qualities.txt:md5,4c059b4e8bf0a64940ad1d6e30efd3a6", + "snpeff_variant_effects_region.txt:md5,05efd324edadced17ba3cd2b7714af57", + "vep-general-stats.txt:md5,57563be109a57f6edfa427b2b2c310ba", + "vep.txt:md5,bf54f689bb0ccab5e1566e48373f768c" + ], + "No BAM files", + "No CRAM files", + [ + "test_snpEff_VEP.ann.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_snpEff_VEP_snpSift.ann.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T15:02:33.282539192" + }, + "-profile test --tools merge": { + "content": [ + 4, + { + "SNPEFF_SNPEFF": { + "snpeff": "5.4a" + }, + "TABIX_BGZIPTABIX": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCF_ANNOTATE_MERGE": { + "ensemblvep": "115.2", + "perl-math-cdf": "0.1", + "tabix": "1.21" + } + }, + [ + "annotation", + "annotation/test", + "annotation/test/test_snpEff_VEP.ann.vcf.gz", + "annotation/test/test_snpEff_VEP.ann.vcf.gz.tbi", + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_snpeff.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/snpeff_effects.txt", + "multiqc/multiqc_data/snpeff_qualities.txt", + "multiqc/multiqc_data/snpeff_variant_effects_region.txt", + "multiqc/multiqc_data/vep-general-stats.txt", + "multiqc/multiqc_data/vep.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/snpeff_effects-cnt.pdf", + "multiqc/multiqc_plots/pdf/snpeff_effects-pct.pdf", + "multiqc/multiqc_plots/pdf/snpeff_qualities.pdf", + "multiqc/multiqc_plots/pdf/snpeff_variant_effects_region-cnt.pdf", + "multiqc/multiqc_plots/pdf/snpeff_variant_effects_region-log.pdf", + "multiqc/multiqc_plots/pdf/snpeff_variant_effects_region-pct-log.pdf", + "multiqc/multiqc_plots/pdf/snpeff_variant_effects_region-pct.pdf", + "multiqc/multiqc_plots/pdf/vep-general-stats.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/snpeff_effects-cnt.png", + "multiqc/multiqc_plots/png/snpeff_effects-pct.png", + "multiqc/multiqc_plots/png/snpeff_qualities.png", + "multiqc/multiqc_plots/png/snpeff_variant_effects_region-cnt.png", + "multiqc/multiqc_plots/png/snpeff_variant_effects_region-log.png", + "multiqc/multiqc_plots/png/snpeff_variant_effects_region-pct-log.png", + "multiqc/multiqc_plots/png/snpeff_variant_effects_region-pct.png", + "multiqc/multiqc_plots/png/vep-general-stats.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/snpeff_effects-cnt.svg", + "multiqc/multiqc_plots/svg/snpeff_effects-pct.svg", + "multiqc/multiqc_plots/svg/snpeff_qualities.svg", + "multiqc/multiqc_plots/svg/snpeff_variant_effects_region-cnt.svg", + "multiqc/multiqc_plots/svg/snpeff_variant_effects_region-log.svg", + "multiqc/multiqc_plots/svg/snpeff_variant_effects_region-pct-log.svg", + "multiqc/multiqc_plots/svg/snpeff_variant_effects_region-pct.svg", + "multiqc/multiqc_plots/svg/vep-general-stats.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reports", + "reports/EnsemblVEP", + "reports/EnsemblVEP/test", + "reports/EnsemblVEP/test/test_snpEff_VEP.ann.summary.html", + "reports/snpeff", + "reports/snpeff/test" + ], + [ + "multiqc_citations.txt:md5,ebf9f49bc020eeb38546ddab3a98171e", + "multiqc_snpeff.txt:md5,03a2b1c461cb6e5cccac64033a2f6526", + "snpeff_effects.txt:md5,3c5e9a1c191b77c781dc4d033b1dd1f7", + "snpeff_qualities.txt:md5,4c059b4e8bf0a64940ad1d6e30efd3a6", + "snpeff_variant_effects_region.txt:md5,05efd324edadced17ba3cd2b7714af57", + "vep-general-stats.txt:md5,57563be109a57f6edfa427b2b2c310ba", + "vep.txt:md5,bf54f689bb0ccab5e1566e48373f768c" + ], + "No BAM files", + "No CRAM files", + [ + "test_snpEff_VEP.ann.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-07T08:52:33.470414" + } +} diff --git a/tests/annotation_snpeff.nf.test b/tests/annotation_snpeff.nf.test new file mode 100644 index 0000000000..a557383a0f --- /dev/null +++ b/tests/annotation_snpeff.nf.test @@ -0,0 +1,29 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "Fails with profile test --tools snpeff --snpeff_db na --build_only_index", + params: [ + input: "${projectDir}/tests/csv/3.0/vcf_single.csv", + step: 'annotate', + snpeff_db: "na", + input: false, + build_only_index: true, + tools: 'snpeff' + ], + failure: true, + snapshot: 'stdout', + snapshot_include: 'annotation-cache' + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/annotation_snpeff.nf.test.snap b/tests/annotation_snpeff.nf.test.snap new file mode 100644 index 0000000000..f347aced61 --- /dev/null +++ b/tests/annotation_snpeff.nf.test.snap @@ -0,0 +1,23 @@ +{ + "Fails with profile test --tools snpeff --snpeff_db na --build_only_index": { + "content": [ + [ + "pipeline_info" + ], + "No stable content", + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings", + [ + "This path is not available within annotation-cache.", + "Please check https://annotation-cache.github.io/ to create a request for it." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T19:13:50.126681673" + } +} \ No newline at end of file diff --git a/tests/annotation_snpsift.nf.test b/tests/annotation_snpsift.nf.test new file mode 100644 index 0000000000..1685711ed2 --- /dev/null +++ b/tests/annotation_snpsift.nf.test @@ -0,0 +1,34 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools snpsift", + params: [ + input: "${projectDir}/tests/csv/3.0/vcf_single.csv", + step: 'annotate', + tools: 'snpsift', + snpsift_databases: "${projectDir}/tests/config/snpsift_test_databases.csv" + ] + ], + [ + name: "-profile test --tools snpsift --save_reference", + params: [ + input: "${projectDir}/tests/csv/3.0/vcf_single.csv", + step: 'annotate', + tools: 'snpsift', + snpsift_databases: "${projectDir}/tests/config/snpsift_test_databases.csv", + save_reference: true + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/annotation_snpsift.nf.test.snap b/tests/annotation_snpsift.nf.test.snap new file mode 100644 index 0000000000..ce4b5b2528 --- /dev/null +++ b/tests/annotation_snpsift.nf.test.snap @@ -0,0 +1,103 @@ +{ + "-profile test --tools snpsift": { + "content": [ + 3, + { + "SNPSIFT_ANNMEM": { + "snpsift": "5.4a" + }, + "SNPSIFT_ANNMEMCREATE": { + "snpsift": "5.4a" + } + }, + [ + "annotation", + "annotation/test", + "annotation/test/test_snpSift.ann.vcf.gz", + "annotation/test/test_snpSift.ann.vcf.gz.tbi", + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/snpsift" + ], + [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" + ], + "No BAM files", + "No CRAM files", + [ + "test_snpSift.ann.vcf.gz:md5,5a8fa867f12f8f13038f81370d4cd00a" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-07T00:18:07.39152" + }, + "-profile test --tools snpsift --save_reference": { + "content": [ + 3, + { + "SNPSIFT_ANNMEM": { + "snpsift": "5.4a" + }, + "SNPSIFT_ANNMEMCREATE": { + "snpsift": "5.4a" + } + }, + [ + "annotation", + "annotation/test", + "annotation/test/test_snpSift.ann.vcf.gz", + "annotation/test/test_snpSift.ann.vcf.gz.tbi", + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/snpsift", + "reference/snpsift/test2.vcf.gz.snpsift.vardb", + "reference/snpsift/test2.vcf.gz.snpsift.vardb/MT192765.1.snpsift.df", + "reference/snpsift/test2.vcf.gz.snpsift.vardb/fields.snpsift.db_fields" + ], + [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "MT192765.1.snpsift.df:md5,87a35e6b78cb075081b87403df36f333", + "fields.snpsift.db_fields:md5,12dd1c56cca1cb9046c507172b0f9ae0" + ], + "No BAM files", + "No CRAM files", + [ + "test_snpSift.ann.vcf.gz:md5,5a8fa867f12f8f13038f81370d4cd00a" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-07T00:19:32.088401" + } +} \ No newline at end of file diff --git a/tests/annotation_vep.nf.test b/tests/annotation_vep.nf.test new file mode 100644 index 0000000000..ea3069a341 --- /dev/null +++ b/tests/annotation_vep.nf.test @@ -0,0 +1,42 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "Fails with profile test --tools vep --vep_cache_version 1 --build_only_index", + params: [ + input: "${projectDir}/tests/csv/3.0/vcf_single.csv", + step: 'annotate', + vep_cache_version: 1, + input: false, + build_only_index: true, + tools: 'vep' + ], + failure: true, + snapshot: 'stdout', + snapshot_include: 'annotation-cache' + ], + [ + name: "Fails with profile test --dbnsfp and no dbnsfp_tbi", + params: [ + input: "${projectDir}/tests/csv/3.0/vcf_single.csv", + step: 'annotate', + dbnsfp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + tools: 'vep' + ], + failure: true, + snapshot: 'stdout', + snapshot_include: 'dbnsfp inconsistency' + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/annotation_vep.nf.test.snap b/tests/annotation_vep.nf.test.snap new file mode 100644 index 0000000000..d28416f22e --- /dev/null +++ b/tests/annotation_vep.nf.test.snap @@ -0,0 +1,43 @@ +{ + "Fails with profile test --dbnsfp and no dbnsfp_tbi": { + "content": [ + [ + "pipeline_info" + ], + "No stable content", + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings", + [ + "Execution halted due to dbnsfp inconsistency." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T19:15:20.088754202" + }, + "Fails with profile test --tools vep --vep_cache_version 1 --build_only_index": { + "content": [ + [ + "pipeline_info" + ], + "No stable content", + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings", + [ + "This path is not available within annotation-cache.", + "Please check https://annotation-cache.github.io/ to create a request for it." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T19:14:33.737947216" + } +} \ No newline at end of file diff --git a/tests/bbsplit.nf.test b/tests/bbsplit.nf.test new file mode 100644 index 0000000000..f7713cc693 --- /dev/null +++ b/tests/bbsplit.nf.test @@ -0,0 +1,59 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline - BBSplit contamination removal" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools bbsplit,strelka --bbsplit_fasta_list", + params: [ + tools: 'bbsplit,strelka', + bbsplit_fasta_list: "${projectDir}/tests/csv/bbsplit_fasta_list.csv", + save_bbsplit_reads: true, + input: "${projectDir}/tests/csv/3.0/fastq_single.csv", + modules_testdata_base_path: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/', + igenomes_base: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/', + genome: 'testdata.nf-core.sarek', + save_reference: true + ], + ignoreFiles: '{multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt,multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt,multiqc/multiqc_data/mosdepth_perchrom.txt,multiqc/multiqc_data/samtools*.txt,preprocessing/**/*.cram,reports/mosdepth/*/*,**/info.txt,**/reflist.txt,**/scaffolds.txt.gz,**/summary.txt,**/build_index.log}' + ], + [ + name: "-profile test --tools bbsplit,strelka --bbsplit_fasta_list -stub", + params: [ + tools: 'bbsplit,strelka', + bbsplit_fasta_list: "${projectDir}/tests/csv/bbsplit_fasta_list.csv", + save_bbsplit_reads: true, + input: "${projectDir}/tests/csv/3.0/fastq_single.csv", + modules_testdata_base_path: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/', + igenomes_base: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/', + genome: 'testdata.nf-core.sarek' + ], + stub: true + ], + // Test BBSplit index building in PREPARE_GENOME (no prebuilt index) + // This tests the edge case where BBMAP_BBSPLIT is called with empty reads + // to build the index, which requires ext.prefix to handle null reads gracefully + [ + name: "-profile test --tools bbsplit --bbsplit_fasta_list (no prebuilt index)", + params: [ + tools: 'bbsplit', + bbsplit_fasta_list: "${projectDir}/tests/csv/bbsplit_fasta_list.csv", + input: "${projectDir}/tests/csv/3.0/fastq_single.csv", + fasta: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta', + igenomes_ignore: true, + genome: false, + skip_tools: 'baserecalibrator', + modules_testdata_base_path: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + ], + ignoreFiles: '{multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt,multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt,multiqc/multiqc_data/mosdepth_perchrom.txt,multiqc/multiqc_data/samtools*.txt,preprocessing/**/*.cram,reports/mosdepth/*/*}' + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/bbsplit.nf.test.snap b/tests/bbsplit.nf.test.snap new file mode 100644 index 0000000000..dd5505bf6e --- /dev/null +++ b/tests/bbsplit.nf.test.snap @@ -0,0 +1,814 @@ +{ + "-profile test --tools bbsplit,strelka --bbsplit_fasta_list": { + "content": [ + 26, + { + "BBMAP_BBSPLIT": { + "bbmap": "39.18" + }, + "BBMAP_INDEX": { + "bbmap": "39.18" + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bbmap-bbsplit_plot.txt", + "multiqc/multiqc_data/bbmap.txt", + "multiqc/multiqc_data/bbsplit_stats_table.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bbmap-bbsplit_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/bbmap-bbsplit_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/bbsplit_stats_table.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bbmap-bbsplit_plot-cnt.png", + "multiqc/multiqc_plots/png/bbmap-bbsplit_plot-pct.png", + "multiqc/multiqc_plots/png/bbsplit_stats_table.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bbmap-bbsplit_plot-cnt.svg", + "multiqc/multiqc_plots/svg/bbmap-bbsplit_plot-pct.svg", + "multiqc/multiqc_plots/svg/bbsplit_stats_table.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/bbsplit", + "preprocessing/bbsplit/test-test_L1", + "preprocessing/bbsplit/test-test_L1/test-test_L1.stats.txt", + "preprocessing/bbsplit/test-test_L1/test-test_L1_chr22_1.fastq.gz", + "preprocessing/bbsplit/test-test_L1/test-test_L1_chr22_2.fastq.gz", + "preprocessing/bbsplit/test-test_L1/test-test_L1_primary_1.fastq.gz", + "preprocessing/bbsplit/test-test_L1/test-test_L1_primary_2.fastq.gz", + "preprocessing/bbsplit/test-test_L2", + "preprocessing/bbsplit/test-test_L2/test-test_L2.stats.txt", + "preprocessing/bbsplit/test-test_L2/test-test_L2_chr22_1.fastq.gz", + "preprocessing/bbsplit/test-test_L2/test-test_L2_chr22_2.fastq.gz", + "preprocessing/bbsplit/test-test_L2/test-test_L2_primary_1.fastq.gz", + "preprocessing/bbsplit/test-test_L2/test-test_L2_primary_2.fastq.gz", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reference/bbsplit_index", + "reference/bbsplit_index/ref", + "reference/bbsplit_index/ref/genome", + "reference/bbsplit_index/ref/genome/1", + "reference/bbsplit_index/ref/genome/1/chr1.chrom.gz", + "reference/bbsplit_index/ref/genome/1/info.txt", + "reference/bbsplit_index/ref/genome/1/merged_ref_9222711925177839698.fa.gz", + "reference/bbsplit_index/ref/genome/1/namelist.txt", + "reference/bbsplit_index/ref/genome/1/reflist.txt", + "reference/bbsplit_index/ref/genome/1/scaffolds.txt.gz", + "reference/bbsplit_index/ref/genome/1/summary.txt", + "reference/bbsplit_index/ref/index", + "reference/bbsplit_index/ref/index/1", + "reference/bbsplit_index/ref/index/1/chr1_index_k13_c13_b1.block", + "reference/bbsplit_index/ref/index/1/chr1_index_k13_c13_b1.block2.gz", + "reference/build_index.log", + "reference/bwa", + "reference/bwa/genome.amb", + "reference/bwa/genome.ann", + "reference/bwa/genome.bwt", + "reference/bwa/genome.pac", + "reference/bwa/genome.sa", + "reference/intervals", + "reference/intervals/chr22_1-40001.bed", + "reference/intervals/chr22_1-40001.bed.gz", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/test", + "reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test-test_L2", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.zip", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/test", + "reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.count", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/test", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi" + ], + [ + "fastqc-status-check-heatmap.txt:md5,a020b9689ddeb4abec16b4854fe452f1", + "fastqc_adapter_content_plot.txt:md5,2e1b72be741319e7fadbbb39d7e5b37d", + "fastqc_per_base_n_content_plot.txt:md5,ad3b971a6bb4e8ba6c844c8a03584eb8", + "fastqc_per_base_sequence_quality_plot.txt:md5,1bc03889d243a944253ac637d81ae10c", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,2c42d140ce06c08dad2b58f397c23239", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,59e22821d350bfb97c37ffd9088f5ad9", + "fastqc_per_sequence_quality_scores_plot.txt:md5,f33615cc98bb6225f39545a415fa7c0f", + "fastqc_sequence_counts_plot.txt:md5,7f0f19a58e8e54e792a751fd04a9ae13", + "fastqc_sequence_duplication_levels_plot.txt:md5,92b02e250ff78725deb9a10d510fcecc", + "fastqc_sequence_length_distribution_plot.txt:md5,fb04dce68ec566314125bc9438211b28", + "multiqc_citations.txt:md5,790a7694da30294b51fa1d6c3eb8ba2b", + "multiqc_fastqc.txt:md5,bde0d0bffa62228b33fb68b7e25b6ff8", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chr1.chrom.gz:md5,55963ae4b37886f20734ac01d6041dc8", + "merged_ref_9222711925177839698.fa.gz:md5,619298e6deae7f8cc3ed3b649ab4f875", + "namelist.txt:md5,c26fae3e77a7b86aded8dbb20e387f38", + "chr1_index_k13_c13_b1.block:md5,3f2154585dddcd097072fbb3a618bace", + "chr1_index_k13_c13_b1.block2.gz:md5,cbc6dd52d518bd3240d7f7953e9bba47", + "genome.amb:md5,1891c1de381b3a96d4e72f590fde20c1", + "genome.ann:md5,2df4aa2d7580639fa0fcdbcad5e2e969", + "genome.bwt:md5,815eded87e4cb6b0f1daab5c4d6e30af", + "genome.pac:md5,8569fbdb2c98c6fb16dfa73d8eacb070", + "genome.sa:md5,e7cff62b919448a3a3d0fe4aaf427594", + "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "test.strelka.variants.bcftools_stats.txt:md5,b6511034fcbf5c396ed035207b4aea41", + "test.strelka.variants.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "test.strelka.variants.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a" + ], + "No BAM files", + "No CRAM files", + [ + "test.strelka.genome.vcf.gz:md5,2396656fa8d478e5fd76cda3a1c91cbb", + "test.strelka.variants.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "timestamp": "2026-02-06T14:19:56.828989311", + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + } + }, + "-profile test --tools bbsplit,strelka --bbsplit_fasta_list -stub": { + "content": [ + 26, + { + "BBMAP_BBSPLIT": { + "bbmap": "39.18" + }, + "BBMAP_INDEX": { + "bbmap": "39.18" + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/.stub", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/bbsplit", + "preprocessing/bbsplit/test-test_L1", + "preprocessing/bbsplit/test-test_L1/test-test_L1.stats.txt", + "preprocessing/bbsplit/test-test_L1/test-test_L1_primary.fastq.gz", + "preprocessing/bbsplit/test-test_L2", + "preprocessing/bbsplit/test-test_L2/test-test_L2.stats.txt", + "preprocessing/bbsplit/test-test_L2/test-test_L2_primary.fastq.gz", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.cram", + "preprocessing/markduplicates/test/test.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/test", + "reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1.html", + "reports/fastqc/test-test_L1/test-test_L1.zip", + "reports/fastqc/test-test_L2", + "reports/fastqc/test-test_L2/test-test_L2.html", + "reports/fastqc/test-test_L2/test-test_L2.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.global.dist.txt", + "reports/mosdepth/test/test.md.per-base.bed.gz", + "reports/mosdepth/test/test.md.per-base.bed.gz.csi", + "reports/mosdepth/test/test.md.per-base.d4", + "reports/mosdepth/test/test.md.quantized.bed.gz", + "reports/mosdepth/test/test.md.quantized.bed.gz.csi", + "reports/mosdepth/test/test.md.region.dist.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.md.summary.txt", + "reports/mosdepth/test/test.md.thresholds.bed.gz", + "reports/mosdepth/test/test.md.thresholds.bed.gz.csi", + "reports/mosdepth/test/test.recal.global.dist.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/mosdepth/test/test.recal.per-base.d4", + "reports/mosdepth/test/test.recal.quantized.bed.gz", + "reports/mosdepth/test/test.recal.quantized.bed.gz.csi", + "reports/mosdepth/test/test.recal.region.dist.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.summary.txt", + "reports/mosdepth/test/test.recal.thresholds.bed.gz", + "reports/mosdepth/test/test.recal.thresholds.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/test", + "reports/vcftools/strelka/test/test.strelka.variants.012", + "reports/vcftools/strelka/test/test.strelka.variants.012.indv", + "reports/vcftools/strelka/test/test.strelka.variants.012.pos", + "reports/vcftools/strelka/test/test.strelka.variants.BEAGLE.GL", + "reports/vcftools/strelka/test/test.strelka.variants.BEAGLE.PL", + "reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/test/test.strelka.variants.FORMAT", + "reports/vcftools/strelka/test/test.strelka.variants.INFO", + "reports/vcftools/strelka/test/test.strelka.variants.LROH", + "reports/vcftools/strelka/test/test.strelka.variants.Tajima.D", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.count", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.summary", + "reports/vcftools/strelka/test/test.strelka.variants.bcf", + "reports/vcftools/strelka/test/test.strelka.variants.diff.discordance.matrix", + "reports/vcftools/strelka/test/test.strelka.variants.diff.indv", + "reports/vcftools/strelka/test/test.strelka.variants.diff.indv_in_files", + "reports/vcftools/strelka/test/test.strelka.variants.diff.sites", + "reports/vcftools/strelka/test/test.strelka.variants.diff.sites_in_files", + "reports/vcftools/strelka/test/test.strelka.variants.diff.switch", + "reports/vcftools/strelka/test/test.strelka.variants.frq", + "reports/vcftools/strelka/test/test.strelka.variants.frq.count", + "reports/vcftools/strelka/test/test.strelka.variants.gdepth", + "reports/vcftools/strelka/test/test.strelka.variants.geno.chisq", + "reports/vcftools/strelka/test/test.strelka.variants.geno.ld", + "reports/vcftools/strelka/test/test.strelka.variants.hap.ld", + "reports/vcftools/strelka/test/test.strelka.variants.hapcount", + "reports/vcftools/strelka/test/test.strelka.variants.het", + "reports/vcftools/strelka/test/test.strelka.variants.hwe", + "reports/vcftools/strelka/test/test.strelka.variants.idepth", + "reports/vcftools/strelka/test/test.strelka.variants.ifreqburden", + "reports/vcftools/strelka/test/test.strelka.variants.imiss", + "reports/vcftools/strelka/test/test.strelka.variants.impute.hap", + "reports/vcftools/strelka/test/test.strelka.variants.impute.hap.indv", + "reports/vcftools/strelka/test/test.strelka.variants.impute.hap.legend", + "reports/vcftools/strelka/test/test.strelka.variants.indel.hist", + "reports/vcftools/strelka/test/test.strelka.variants.interchrom.geno.ld", + "reports/vcftools/strelka/test/test.strelka.variants.interchrom.hap.ld", + "reports/vcftools/strelka/test/test.strelka.variants.kept.sites", + "reports/vcftools/strelka/test/test.strelka.variants.ldepth", + "reports/vcftools/strelka/test/test.strelka.variants.ldepth.mean", + "reports/vcftools/strelka/test/test.strelka.variants.ldhat.locs", + "reports/vcftools/strelka/test/test.strelka.variants.ldhat.sites", + "reports/vcftools/strelka/test/test.strelka.variants.list.geno.ld", + "reports/vcftools/strelka/test/test.strelka.variants.list.hap.ld", + "reports/vcftools/strelka/test/test.strelka.variants.lmiss", + "reports/vcftools/strelka/test/test.strelka.variants.lqual", + "reports/vcftools/strelka/test/test.strelka.variants.map", + "reports/vcftools/strelka/test/test.strelka.variants.mendel", + "reports/vcftools/strelka/test/test.strelka.variants.ped", + "reports/vcftools/strelka/test/test.strelka.variants.relatedness", + "reports/vcftools/strelka/test/test.strelka.variants.relatedness2", + "reports/vcftools/strelka/test/test.strelka.variants.removed.sites", + "reports/vcftools/strelka/test/test.strelka.variants.singletons", + "reports/vcftools/strelka/test/test.strelka.variants.sites.pi", + "reports/vcftools/strelka/test/test.strelka.variants.snpden", + "reports/vcftools/strelka/test/test.strelka.variants.tfam", + "reports/vcftools/strelka/test/test.strelka.variants.tped", + "reports/vcftools/strelka/test/test.strelka.variants.vcf", + "reports/vcftools/strelka/test/test.strelka.variants.weir.fst", + "reports/vcftools/strelka/test/test.strelka.variants.windowed.pi", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/test", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1", + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "timestamp": "2026-04-08T12:05:44.94807545", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.2" + } + }, + "-profile test --tools bbsplit --bbsplit_fasta_list (no prebuilt index)": { + "content": [ + 18, + { + "BBMAP_BBSPLIT": { + "bbmap": "39.18" + }, + "BBMAP_INDEX": { + "bbmap": "39.18" + }, + "BUILD_INTERVALS": { + "gawk": "5.3.0" + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.21 + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates_no_table.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bbmap-bbsplit_plot.txt", + "multiqc/multiqc_data/bbmap.txt", + "multiqc/multiqc_data/bbsplit_stats_table.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bbmap-bbsplit_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/bbmap-bbsplit_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/bbsplit_stats_table.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bbmap-bbsplit_plot-cnt.png", + "multiqc/multiqc_plots/png/bbmap-bbsplit_plot-pct.png", + "multiqc/multiqc_plots/png/bbsplit_stats_table.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bbmap-bbsplit_plot-cnt.svg", + "multiqc/multiqc_plots/svg/bbmap-bbsplit_plot-pct.svg", + "multiqc/multiqc_plots/svg/bbsplit_stats_table.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/bbsplit", + "preprocessing/bbsplit/test-test_L1", + "preprocessing/bbsplit/test-test_L1/test-test_L1.stats.txt", + "preprocessing/bbsplit/test-test_L2", + "preprocessing/bbsplit/test-test_L2/test-test_L2.stats.txt", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "reference", + "reference/dict", + "reference/fai", + "reports", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test-test_L2", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.zip", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats" + ], + [ + "fastqc-status-check-heatmap.txt:md5,a020b9689ddeb4abec16b4854fe452f1", + "fastqc_adapter_content_plot.txt:md5,2e1b72be741319e7fadbbb39d7e5b37d", + "fastqc_per_base_n_content_plot.txt:md5,ad3b971a6bb4e8ba6c844c8a03584eb8", + "fastqc_per_base_sequence_quality_plot.txt:md5,1bc03889d243a944253ac637d81ae10c", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,2c42d140ce06c08dad2b58f397c23239", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,59e22821d350bfb97c37ffd9088f5ad9", + "fastqc_per_sequence_quality_scores_plot.txt:md5,f33615cc98bb6225f39545a415fa7c0f", + "fastqc_sequence_counts_plot.txt:md5,7f0f19a58e8e54e792a751fd04a9ae13", + "fastqc_sequence_duplication_levels_plot.txt:md5,92b02e250ff78725deb9a10d510fcecc", + "fastqc_sequence_length_distribution_plot.txt:md5,fb04dce68ec566314125bc9438211b28", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "multiqc_fastqc.txt:md5,bde0d0bffa62228b33fb68b7e25b6ff8", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No BAM files", + "No CRAM files", + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "timestamp": "2026-02-06T14:28:35.628964819", + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + } + } +} \ No newline at end of file diff --git a/tests/config/bcfann_test_columns.txt b/tests/config/bcfann_test_columns.txt new file mode 100644 index 0000000000..bfbbff971e --- /dev/null +++ b/tests/config/bcfann_test_columns.txt @@ -0,0 +1,3 @@ +INFO/ICB +INFO/HOB +INFO/DP4 diff --git a/tests/config/bcfann_test_header.txt b/tests/config/bcfann_test_header.txt new file mode 100644 index 0000000000..443dd3ea4a --- /dev/null +++ b/tests/config/bcfann_test_header.txt @@ -0,0 +1 @@ +##INFO= diff --git a/tests/config/nextflow.config b/tests/config/nextflow.config deleted file mode 100644 index 74e1c2d513..0000000000 --- a/tests/config/nextflow.config +++ /dev/null @@ -1,35 +0,0 @@ -params { - outdir = "output/" - publish_dir_mode = "copy" - enable_conda = false - singularity_pull_docker_container = false - max_cpus = 2 - max_memory = 6.GB - max_time = 6.h -} - -process { - cpus = 2 - memory = 6.GB - time = 48.h -} - -if ("$PROFILE" == "singularity") { - singularity.enabled = true - singularity.autoMounts = true -} else if ("$PROFILE" == "conda") { - params.enable_conda = true -} else { - docker.enabled = true - docker.runOptions = '-u \$(id -u):\$(id -g)' -} - -// Load test_data.config containing paths to test data -includeConfig 'test_data.config' - -// Load modules.config for default module params -includeConfig '../../conf/modules.config' - -manifest { - nextflowVersion = '!>=21.10.3' -} diff --git a/tests/config/pytest_software.yml b/tests/config/pytest_software.yml deleted file mode 100644 index b215cffcc6..0000000000 --- a/tests/config/pytest_software.yml +++ /dev/null @@ -1,4 +0,0 @@ -markduplicates: - - modules/nf-core/software/gatk4/markduplicates/main.nf - - subworkflow/local/markduplicates.nf - - tests/subworkflow/local/markduplicates/** diff --git a/tests/config/snpsift_test_databases.csv b/tests/config/snpsift_test_databases.csv new file mode 100644 index 0000000000..2c13c9a98f --- /dev/null +++ b/tests/config/snpsift_test_databases.csv @@ -0,0 +1,2 @@ +vcf,tbi,fields,prefix,vardb +https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/vcf/test2.vcf.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi,DP;VDB;MQB;BQB,TEST_, diff --git a/tests/config/test_data.config b/tests/config/test_data.config deleted file mode 100644 index 665947a594..0000000000 --- a/tests/config/test_data.config +++ /dev/null @@ -1,15 +0,0 @@ -def test_data_dir = "${launchDir}/tests/data/" -def nf_core_modules_data = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" - -try { - includeConfig "https://raw.githubusercontent.com/nf-core/modules/master/tests/config/test_data.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/modules test data config") -} - -params { - test_data { - 'external' { - } - } -} diff --git a/tests/csv/3.0/ascat_somatic.csv b/tests/csv/3.0/ascat_somatic.csv new file mode 100644 index 0000000000..22b50bf1c6 --- /dev/null +++ b/tests/csv/3.0/ascat_somatic.csv @@ -0,0 +1,3 @@ +patient,sex,status,sample,bam,bai +NA12878,XX,0,NA12878,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/NA12878.chr22.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/NA12878.chr22.bam.bai +NA12878,XX,1,NA12878_1X,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/NA12878.chr22.1X.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/NA12878.chr22.1X.bam.bai diff --git a/tests/csv/3.0/bam_and_fastq_and_spring.csv b/tests/csv/3.0/bam_and_fastq_and_spring.csv new file mode 100644 index 0000000000..b81b88edbc --- /dev/null +++ b/tests/csv/3.0/bam_and_fastq_and_spring.csv @@ -0,0 +1,5 @@ +patient,sex,status,sample,lane,fastq_1,fastq_2,spring_1,spring_2,bam,bai +test,XX,0,test,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz,,,, +test2,XX,0,test2,test2_L1,,,https://raw.githubusercontent.com/nf-core/test-datasets/sarek3/data/genomics/homo_sapiens/illumina/spring/test_1.fastq.gz.spring,https://raw.githubusercontent.com/nf-core/test-datasets/sarek3/data/genomics/homo_sapiens/illumina/spring/test_2.fastq.gz.spring,, +test3,XX,0,test3,test3_L1,,,https://raw.githubusercontent.com/nf-core/test-datasets/sarek3/data/genomics/homo_sapiens/illumina/spring/test_R1_R2.fastq.gz.spring,,, +test_bam,XX,0,test_bam,test_bam_L1,,,,,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai diff --git a/tests/csv/3.0/bam_for_remapping.csv b/tests/csv/3.0/bam_for_remapping.csv new file mode 100644 index 0000000000..a37e07bb83 --- /dev/null +++ b/tests/csv/3.0/bam_for_remapping.csv @@ -0,0 +1,2 @@ +patient,sex,status,sample,lane,bam,bai +test,XX,0,test,1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai diff --git a/tests/csv/3.0/bam_tumoronly_msisensor2.csv b/tests/csv/3.0/bam_tumoronly_msisensor2.csv new file mode 100644 index 0000000000..4e2adf15e8 --- /dev/null +++ b/tests/csv/3.0/bam_tumoronly_msisensor2.csv @@ -0,0 +1,2 @@ +patient,sex,status,sample,lane,bam,bai +test,XX,1,test,1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test_msisensor2.hg19.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test_msisensor2.hg19.bam.bai diff --git a/tests/csv/3.0/bam_umi_header.csv b/tests/csv/3.0/bam_umi_header.csv new file mode 100644 index 0000000000..10c06bc37d --- /dev/null +++ b/tests/csv/3.0/bam_umi_header.csv @@ -0,0 +1,2 @@ +patient,status,lane,sample,bam,bai +test,0,1,test,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_in_header.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/umi/test.paired_end.umi_in_header.bam.bai diff --git a/tests/csv/3.0/fastq_multi_lane.csv b/tests/csv/3.0/fastq_multi_lane.csv new file mode 100644 index 0000000000..4cc7dc63c6 --- /dev/null +++ b/tests/csv/3.0/fastq_multi_lane.csv @@ -0,0 +1,4 @@ +patient,sex,status,sample,lane,fastq_1,fastq_2 +test,XX,0,test,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz +test,XX,0,test,test_L2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz +test,XX,1,test2,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz diff --git a/tests/csv/3.0/fastq_multiple_lane_ids.csv b/tests/csv/3.0/fastq_multiple_lane_ids.csv new file mode 100644 index 0000000000..82d9f06107 --- /dev/null +++ b/tests/csv/3.0/fastq_multiple_lane_ids.csv @@ -0,0 +1,5 @@ +patient,sex,status,sample,lane,fastq_1,fastq_2 +test,XX,0,test,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz +test,XX,0,test,test_L2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz +test2,XX,0,test2,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz +test2,XX,0,test2,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz diff --git a/tests/csv/3.0/fastq_multiple_sample_ids.csv b/tests/csv/3.0/fastq_multiple_sample_ids.csv new file mode 100644 index 0000000000..9ac1cbe905 --- /dev/null +++ b/tests/csv/3.0/fastq_multiple_sample_ids.csv @@ -0,0 +1,5 @@ +patient,sex,status,sample,lane,fastq_1,fastq_2 +test,XX,0,test,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz +test,XX,0,test,test_L2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz +test2,XX,0,test,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz +test2,XX,0,test,test_L2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz diff --git a/tests/csv/3.0/fastq_pair.csv b/tests/csv/3.0/fastq_pair.csv index ce9e72fd97..2986ebfd1b 100644 --- a/tests/csv/3.0/fastq_pair.csv +++ b/tests/csv/3.0/fastq_pair.csv @@ -1,3 +1,3 @@ -patient,gender,status,sample,lane,fastq_1,fastq_2 +patient,sex,status,sample,lane,fastq_1,fastq_2 test,XX,0,test,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz -test,XX,1,test2,test2_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz +test,XX,1,test2,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz diff --git a/tests/csv/3.0/fastq_sample_with_space.csv b/tests/csv/3.0/fastq_sample_with_space.csv new file mode 100644 index 0000000000..24e917ce44 --- /dev/null +++ b/tests/csv/3.0/fastq_sample_with_space.csv @@ -0,0 +1,3 @@ +patient,sex,status,sample,lane,fastq_1,fastq_2 +test,XX,0,test,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz +test,XX,1,test 2,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz diff --git a/tests/csv/3.0/fastq_single.csv b/tests/csv/3.0/fastq_single.csv index 7e84c751df..c89bab1bec 100644 --- a/tests/csv/3.0/fastq_single.csv +++ b/tests/csv/3.0/fastq_single.csv @@ -1,2 +1,3 @@ -patient,gender,status,sample,lane,fastq_1,fastq_2 +patient,sex,status,sample,lane,fastq_1,fastq_2 test,XX,0,test,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz +test,XX,0,test,test_L2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz diff --git a/tests/csv/3.0/fastq_single_integer_lane.csv b/tests/csv/3.0/fastq_single_integer_lane.csv new file mode 100644 index 0000000000..edafeb45e3 --- /dev/null +++ b/tests/csv/3.0/fastq_single_integer_lane.csv @@ -0,0 +1,3 @@ +patient,sex,status,sample,lane,fastq_1,fastq_2 +test,XX,0,test,0,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz +test,XX,0,test,1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz diff --git a/tests/csv/3.0/fastq_triple_two_tumor.csv b/tests/csv/3.0/fastq_triple_two_tumor.csv new file mode 100644 index 0000000000..78233c1b29 --- /dev/null +++ b/tests/csv/3.0/fastq_triple_two_tumor.csv @@ -0,0 +1,4 @@ +patient,sex,status,sample,lane,fastq_1,fastq_2 +test,XX,0,test,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz +test,XX,1,test2,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz +test,XX,1,test3,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz diff --git a/tests/csv/3.0/fastq_tumor_only.csv b/tests/csv/3.0/fastq_tumor_only.csv new file mode 100644 index 0000000000..ea75421038 --- /dev/null +++ b/tests/csv/3.0/fastq_tumor_only.csv @@ -0,0 +1,2 @@ +patient,sex,status,sample,lane,fastq_1,fastq_2 +test,XX,1,test2,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz diff --git a/tests/csv/3.0/fastq_umi.csv b/tests/csv/3.0/fastq_umi.csv index e44afb0f14..19001f0e84 100644 --- a/tests/csv/3.0/fastq_umi.csv +++ b/tests/csv/3.0/fastq_umi.csv @@ -1,2 +1,2 @@ -patient,gender,status,sample,lane,fastq_1,fastq_2 -test,XX,0,test,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test.umi_2.fastq.gz +patient,sex,status,sample,lane,fastq_1,fastq_2 +test,XX,0,test,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_umi_cleanname_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_umi_cleanname_2.fastq.gz diff --git a/tests/csv/3.0/fastq_umi_multi_lane.csv b/tests/csv/3.0/fastq_umi_multi_lane.csv new file mode 100644 index 0000000000..7fe0fbc8d7 --- /dev/null +++ b/tests/csv/3.0/fastq_umi_multi_lane.csv @@ -0,0 +1,3 @@ +patient,sex,status,sample,lane,fastq_1,fastq_2 +test,XX,0,test,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_umi_cleanname_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_umi_cleanname_2.fastq.gz +test,XX,0,test,test_L2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2.umi_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2.umi_2.fastq.gz diff --git a/tests/csv/3.0/mapped_joint_bam.csv b/tests/csv/3.0/mapped_joint_bam.csv new file mode 100644 index 0000000000..1dc3920b1e --- /dev/null +++ b/tests/csv/3.0/mapped_joint_bam.csv @@ -0,0 +1,3 @@ +patient,status,sample,bam,bai +testN,0,testN,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai +testT,0,testT,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai diff --git a/tests/csv/3.0/mapped_single.csv b/tests/csv/3.0/mapped_single_bam.csv similarity index 52% rename from tests/csv/3.0/mapped_single.csv rename to tests/csv/3.0/mapped_single_bam.csv index 8b074cd32a..8cbe6f9ce2 100644 --- a/tests/csv/3.0/mapped_single.csv +++ b/tests/csv/3.0/mapped_single_bam.csv @@ -1,3 +1,2 @@ patient,status,sample,bam,bai test,0,test,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai -test1,0,test1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai diff --git a/tests/csv/3.0/mapped_single_cram.csv b/tests/csv/3.0/mapped_single_cram.csv new file mode 100644 index 0000000000..1baa471c41 --- /dev/null +++ b/tests/csv/3.0/mapped_single_cram.csv @@ -0,0 +1,2 @@ +patient,status,sample,cram,crai +test,0,test,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai diff --git a/tests/csv/3.0/prepare_recalibration_single_bam.csv b/tests/csv/3.0/prepare_recalibration_single_bam.csv new file mode 100644 index 0000000000..a61c3f8222 --- /dev/null +++ b/tests/csv/3.0/prepare_recalibration_single_bam.csv @@ -0,0 +1,2 @@ +patient,status,sample,bam,bai,table +test,0,test,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table diff --git a/tests/csv/3.0/prepare_recalibration_single_cram.csv b/tests/csv/3.0/prepare_recalibration_single_cram.csv new file mode 100644 index 0000000000..4adc8fa105 --- /dev/null +++ b/tests/csv/3.0/prepare_recalibration_single_cram.csv @@ -0,0 +1,2 @@ +patient,status,sample,cram,crai,table +test,0,test,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table diff --git a/tests/csv/3.0/recalibrated.csv b/tests/csv/3.0/recalibrated.csv index 407a3b4cb5..fbaba2c90d 100644 --- a/tests/csv/3.0/recalibrated.csv +++ b/tests/csv/3.0/recalibrated.csv @@ -1,6 +1,5 @@ -patient,gender,status,sample,cram,crai +patient,sex,status,sample,cram,crai test,XX,0,sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai -test2,XX,0,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai -test2,XX,1,sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai +test1,XX,1,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai +test3,XX,0,sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai test3,XX,1,sample4,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai -test3,XX,1,sample5,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai diff --git a/tests/csv/3.0/recalibrated_germline.csv b/tests/csv/3.0/recalibrated_germline.csv new file mode 100644 index 0000000000..a7875203a6 --- /dev/null +++ b/tests/csv/3.0/recalibrated_germline.csv @@ -0,0 +1,2 @@ +patient,sex,status,sample,cram,crai +test,XX,0,sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai diff --git a/tests/csv/3.0/recalibrated_somatic.csv b/tests/csv/3.0/recalibrated_somatic.csv new file mode 100644 index 0000000000..e7095a2199 --- /dev/null +++ b/tests/csv/3.0/recalibrated_somatic.csv @@ -0,0 +1,3 @@ +patient,sex,status,sample,cram,crai,contamination +test3,XX,0,sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai,0 +test3,XX,1,sample4,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai,0.2 diff --git a/tests/csv/3.0/recalibrated_somatic_joint.csv b/tests/csv/3.0/recalibrated_somatic_joint.csv new file mode 100644 index 0000000000..12eb61d860 --- /dev/null +++ b/tests/csv/3.0/recalibrated_somatic_joint.csv @@ -0,0 +1,4 @@ +patient,sex,status,sample,cram,crai +test,XX,0,sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai +test,XX,1,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai +test,XX,1,sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram.crai diff --git a/tests/csv/3.0/recalibrated_somatic_two_normal_one_sample.csv b/tests/csv/3.0/recalibrated_somatic_two_normal_one_sample.csv new file mode 100644 index 0000000000..374790ffad --- /dev/null +++ b/tests/csv/3.0/recalibrated_somatic_two_normal_one_sample.csv @@ -0,0 +1,4 @@ +patient,sex,status,sample,cram,crai +test,XX,0,sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai +test,XX,0,sample1B,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai +test,XX,1,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai diff --git a/tests/csv/3.0/recalibrated_tumoronly.csv b/tests/csv/3.0/recalibrated_tumoronly.csv new file mode 100644 index 0000000000..8f766d9e5a --- /dev/null +++ b/tests/csv/3.0/recalibrated_tumoronly.csv @@ -0,0 +1,2 @@ +patient,sex,status,sample,cram,crai,contamination +test1,XX,1,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai,0.2 diff --git a/tests/csv/3.0/recalibrated_tumoronly_joint.csv b/tests/csv/3.0/recalibrated_tumoronly_joint.csv new file mode 100644 index 0000000000..f3ded832ec --- /dev/null +++ b/tests/csv/3.0/recalibrated_tumoronly_joint.csv @@ -0,0 +1,3 @@ +patient,sex,status,sample,cram,crai +test,XX,1,sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai +test,XX,1,sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test3.paired_end.recalibrated.sorted.cram.crai diff --git a/tests/csv/3.0/vcf_single.csv b/tests/csv/3.0/vcf_single.csv new file mode 100644 index 0000000000..601e72f60f --- /dev/null +++ b/tests/csv/3.0/vcf_single.csv @@ -0,0 +1,2 @@ +patient,sample,vcf +test,test,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/vcf/test.vcf.gz diff --git a/tests/csv/bbsplit_fasta_list.csv b/tests/csv/bbsplit_fasta_list.csv new file mode 100644 index 0000000000..2b1c0eb3f3 --- /dev/null +++ b/tests/csv/bbsplit_fasta_list.csv @@ -0,0 +1 @@ +chr22,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr22/sequence/chr22_23800000-23980000.fa diff --git a/tests/default.nf.test b/tests/default.nf.test new file mode 100644 index 0000000000..6f18ccfcfd --- /dev/null +++ b/tests/default.nf.test @@ -0,0 +1,23 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test", + params: [], + snapshot: 'stderr,stdout', + ], + [ + name: "-profile test -stub", + params: [], + stub: true, + ], + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap new file mode 100644 index 0000000000..da3eeb6d16 --- /dev/null +++ b/tests/default.nf.test.snap @@ -0,0 +1,612 @@ +{ + "-profile test": { + "content": [ + 23, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/test", + "reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test-test_L2", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.zip", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/test", + "reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.count", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/test", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi" + ], + [ + "fastqc-status-check-heatmap.txt:md5,a020b9689ddeb4abec16b4854fe452f1", + "fastqc_adapter_content_plot.txt:md5,2e1b72be741319e7fadbbb39d7e5b37d", + "fastqc_per_base_n_content_plot.txt:md5,ad3b971a6bb4e8ba6c844c8a03584eb8", + "fastqc_per_base_sequence_quality_plot.txt:md5,1bc03889d243a944253ac637d81ae10c", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,2c42d140ce06c08dad2b58f397c23239", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,59e22821d350bfb97c37ffd9088f5ad9", + "fastqc_per_sequence_quality_scores_plot.txt:md5,f33615cc98bb6225f39545a415fa7c0f", + "fastqc_sequence_counts_plot.txt:md5,7f0f19a58e8e54e792a751fd04a9ae13", + "fastqc_sequence_duplication_levels_plot.txt:md5,92b02e250ff78725deb9a10d510fcecc", + "fastqc_sequence_length_distribution_plot.txt:md5,fb04dce68ec566314125bc9438211b28", + "mosdepth-coverage-per-contig-single.txt:md5,a0990e98cdd2cd5540d07f504f516ccf", + "mosdepth-cumcoverage-dist-id.txt:md5,caee7b9e5d1a451970f87d791c3e450b", + "mosdepth_perchrom.txt:md5,a0990e98cdd2cd5540d07f504f516ccf", + "multiqc_citations.txt:md5,790a7694da30294b51fa1d6c3eb8ba2b", + "multiqc_fastqc.txt:md5,bde0d0bffa62228b33fb68b7e25b6ff8", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,ae7954084b2cd708c5e7369606bf4208", + "samtools_alignment_plot.txt:md5,438e719bf574a46726dbd2e0f1442e42", + "test.strelka.variants.bcftools_stats.txt:md5,2613827870dd789fe602a8a3b739b7f2", + "test.md.mosdepth.global.dist.txt:md5,ef7c375ae07aec5540f9892b9b556b73", + "test.md.mosdepth.region.dist.txt:md5,212efff2213f6fc1c3204daf68bbb8c8", + "test.md.mosdepth.summary.txt:md5,72114393647ff64503522760218b30f0", + "test.md.regions.bed.gz:md5,985db429051ddcd5eae177da6fb55ad6", + "test.md.regions.bed.gz.csi:md5,3fa0f8272fefafe3cd840376d34a94a2", + "test.recal.mosdepth.global.dist.txt:md5,ef7c375ae07aec5540f9892b9b556b73", + "test.recal.mosdepth.region.dist.txt:md5,212efff2213f6fc1c3204daf68bbb8c8", + "test.recal.mosdepth.summary.txt:md5,72114393647ff64503522760218b30f0", + "test.recal.regions.bed.gz:md5,985db429051ddcd5eae177da6fb55ad6", + "test.recal.regions.bed.gz.csi:md5,3fa0f8272fefafe3cd840376d34a94a2", + "test.strelka.variants.FILTER.summary:md5,dd87f507da7de20d5318841af312493b", + "test.strelka.variants.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a" + ], + "No BAM files", + [ + "test.md.cram:md5,724c601c9daf019d356a53a7d5e1c8b1", + "test.recal.cram:md5,dbd6f40b1e6d72501dc034e62e9d54eb" + ], + [ + "test.strelka.genome.vcf.gz:md5,16437a040679d88b7d84a9276f793d6c", + "test.strelka.variants.vcf.gz:md5,666f835fdaf4952a179cdedd40c9d565" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ], + [ + "N E X T F L O W ~ version [VERSION]", + "Launching `[PATH]/tests/../main.nf` [RUN_NAME] DSL2 - revision: [REVISION]", + "------------------------------------------------------", + " ,--./,-.", + " ___ __ __ __ ___ /,-._.--~'", + " |\\ | |__ __ / ` / \\ |__) |__ } {", + " | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-,", + " `._,._,'", + " ____", + " .\u00b4 _ `.", + " / |\\`-_ \\ __ __ ___ ", + " | | \\ `-| |__` /\\ |__) |__ |__/", + " \\ | \\ / .__| /\u00af\u00af\\ | \\ |___ | \\", + " `|____\\\u00b4", + " nf-core/sarek [VERSION]", + "------------------------------------------------------", + "Input/output options", + " input : [PATH]/tests/csv/3.0/fastq_single.csv", + " outdir : [PATH]/tests/[NFT_HASH]/output", + "Main options", + " split_fastq : 0", + " intervals : https://raw.githubusercontent.com/nf-core/test-datasets/modules/data//genomics/homo_sapiens/genome/genome.interval_list", + " tools : strelka", + "Annotation", + " bcftools_annotations : https://raw.githubusercontent.com/nf-core/test-datasets/modules/data//genomics/sarscov2/illumina/vcf/test2.vcf.gz", + " bcftools_annotations_tbi : https://raw.githubusercontent.com/nf-core/test-datasets/modules/data//genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi", + " bcftools_header_lines : [PATH]/tests/config/bcfann_test_header.txt", + "General reference genome options", + " igenomes_base : https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/", + "Reference genome options", + " genome : testdata.nf-core.sarek", + " dbsnp : https://raw.githubusercontent.com/nf-core/test-datasets/modules/data//genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz", + " dbsnp_tbi : https://raw.githubusercontent.com/nf-core/test-datasets/modules/data//genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi", + " dbsnp_vqsr : --resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_146.hg38.vcf.gz", + " dict : https://raw.githubusercontent.com/nf-core/test-datasets/modules/data//genomics/homo_sapiens/genome/genome.dict", + " fasta : https://raw.githubusercontent.com/nf-core/test-datasets/modules/data//genomics/homo_sapiens/genome/genome.fasta", + " fasta_fai : https://raw.githubusercontent.com/nf-core/test-datasets/modules/data//genomics/homo_sapiens/genome/genome.fasta.fai", + " germline_resource : https://raw.githubusercontent.com/nf-core/test-datasets/modules/data//genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz", + " germline_resource_tbi : https://raw.githubusercontent.com/nf-core/test-datasets/modules/data//genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz.tbi", + " known_indels : https://raw.githubusercontent.com/nf-core/test-datasets/modules/data//genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz", + " known_indels_tbi : https://raw.githubusercontent.com/nf-core/test-datasets/modules/data//genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi", + " known_indels_vqsr : --resource:mills,known=false,training=true,truth=true,prior=10.0 mills_and_1000G.indels.vcf.gz", + " ngscheckmate_bed : https://raw.githubusercontent.com/nf-core/test-datasets/modules/data//genomics/homo_sapiens/genome/chr21/germlineresources/SNP_GRCh38_hg38_wChr.bed", + " sentieon_dnascope_model : s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38/Annotation/Sentieon/SentieonDNAscopeModel1.1.model", + " snpeff_db : WBcel235.99", + " vep_cache_version : 114", + " vep_genome : WBcel235", + " vep_species : caenorhabditis_elegans", + "Institutional config options", + " config_profile_name : Test profile", + " config_profile_description : Minimal test dataset to check pipeline function", + " modules_testdata_base_path : https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/", + "Generic options", + " pipelines_testdata_base_path: https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/", + " trace_report_suffix : [TIMESTAMP]", + "Core Nextflow options", + " runName : [RUN_NAME]", + " launchDir : [PATH]/tests/[NFT_HASH]", + " workDir : [PATH]/tests/[NFT_HASH]/work", + " projectDir : [PATH]", + " userName : [USER]", + " profile : test,[CONTAINER]", + " configFiles : [PATH]/nextflow.config, [PATH]/nextflow.config, [PATH]/tests/nextflow.config", + "!! Only displaying parameters that differ from the pipeline defaults !!", + "------------------------------------------------------", + "* The pipeline", + " https://doi.org/10.12688/f1000research.16665.2", + " https://doi.org/10.1093/nargab/lqae031", + " https://doi.org/10.5281/zenodo.3476425", + "* The nf-core framework", + " https://doi.org/10.1038/s41587-020-0439-x", + "* Software dependencies", + " https://github.com/nf-core/sarek/blob/master/CITATIONS.md", + "-[nf-core/sarek] Pipeline completed successfully-", + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1", + "[NXF_HASH] Submitted process > NFCORE_SAREK:PREPARE_GENOME:BWAMEM1_INDEX (genome.fasta)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:PREPARE_INTERVALS:CREATE_INTERVALS_BED (genome.interval_list)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:PREPARE_INTERVALS:GATK4_INTERVALLISTTOBED (genome)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:PREPARE_INTERVALS:TABIX_BGZIPTABIX_INTERVAL_COMBINED (genome)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:PREPARE_INTERVALS:TABIX_BGZIPTABIX_INTERVAL_SPLIT (chr22_1-40001)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_SINGLE_STRELKA:STRELKA_SINGLE (test)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:CRAM_SAMPLEQC:CRAM_QC_RECAL:MOSDEPTH (test)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:CRAM_SAMPLEQC:CRAM_QC_RECAL:SAMTOOLS_STATS (test)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:FASTQC (test-test_L1)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:FASTQC (test-test_L2)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:BAM_APPLYBQSR:CRAM_MERGE_INDEX_SAMTOOLS:INDEX_CRAM (test)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:BAM_APPLYBQSR:GATK4_APPLYBQSR (test)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:BAM_BASERECALIBRATOR:GATK4_BASERECALIBRATOR (test)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:BAM_MARKDUPLICATES:CRAM_QC_MOSDEPTH_SAMTOOLS:MOSDEPTH (test)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:BAM_MARKDUPLICATES:CRAM_QC_MOSDEPTH_SAMTOOLS:SAMTOOLS_STATS (test)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:BAM_MARKDUPLICATES:GATK4_MARKDUPLICATES (test)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:FASTQ_ALIGN:BWAMEM1_MEM (test-test_L1)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:FASTQ_ALIGN:BWAMEM1_MEM (test-test_L2)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:MULTIQC", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:VCF_QC_BCFTOOLS_VCFTOOLS:BCFTOOLS_STATS (test)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:VCF_QC_BCFTOOLS_VCFTOOLS:VCFTOOLS_SUMMARY (test)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:VCF_QC_BCFTOOLS_VCFTOOLS:VCFTOOLS_TSTV_COUNT (test)", + "[NXF_HASH] Submitted process > NFCORE_SAREK:SAREK:VCF_QC_BCFTOOLS_VCFTOOLS:VCFTOOLS_TSTV_QUAL (test)" + ] + ], + "timestamp": "2025-12-15T19:44:11.568889909", + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + } + }, + "-profile test -stub": { + "content": [ + 23, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/.stub", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.cram", + "preprocessing/markduplicates/test/test.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/test", + "reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1.html", + "reports/fastqc/test-test_L1/test-test_L1.zip", + "reports/fastqc/test-test_L2", + "reports/fastqc/test-test_L2/test-test_L2.html", + "reports/fastqc/test-test_L2/test-test_L2.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.global.dist.txt", + "reports/mosdepth/test/test.md.per-base.bed.gz", + "reports/mosdepth/test/test.md.per-base.bed.gz.csi", + "reports/mosdepth/test/test.md.per-base.d4", + "reports/mosdepth/test/test.md.quantized.bed.gz", + "reports/mosdepth/test/test.md.quantized.bed.gz.csi", + "reports/mosdepth/test/test.md.region.dist.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.md.summary.txt", + "reports/mosdepth/test/test.md.thresholds.bed.gz", + "reports/mosdepth/test/test.md.thresholds.bed.gz.csi", + "reports/mosdepth/test/test.recal.global.dist.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/mosdepth/test/test.recal.per-base.d4", + "reports/mosdepth/test/test.recal.quantized.bed.gz", + "reports/mosdepth/test/test.recal.quantized.bed.gz.csi", + "reports/mosdepth/test/test.recal.region.dist.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.summary.txt", + "reports/mosdepth/test/test.recal.thresholds.bed.gz", + "reports/mosdepth/test/test.recal.thresholds.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/test", + "reports/vcftools/strelka/test/test.strelka.variants.012", + "reports/vcftools/strelka/test/test.strelka.variants.012.indv", + "reports/vcftools/strelka/test/test.strelka.variants.012.pos", + "reports/vcftools/strelka/test/test.strelka.variants.BEAGLE.GL", + "reports/vcftools/strelka/test/test.strelka.variants.BEAGLE.PL", + "reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/test/test.strelka.variants.FORMAT", + "reports/vcftools/strelka/test/test.strelka.variants.INFO", + "reports/vcftools/strelka/test/test.strelka.variants.LROH", + "reports/vcftools/strelka/test/test.strelka.variants.Tajima.D", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.count", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.summary", + "reports/vcftools/strelka/test/test.strelka.variants.bcf", + "reports/vcftools/strelka/test/test.strelka.variants.diff.discordance.matrix", + "reports/vcftools/strelka/test/test.strelka.variants.diff.indv", + "reports/vcftools/strelka/test/test.strelka.variants.diff.indv_in_files", + "reports/vcftools/strelka/test/test.strelka.variants.diff.sites", + "reports/vcftools/strelka/test/test.strelka.variants.diff.sites_in_files", + "reports/vcftools/strelka/test/test.strelka.variants.diff.switch", + "reports/vcftools/strelka/test/test.strelka.variants.frq", + "reports/vcftools/strelka/test/test.strelka.variants.frq.count", + "reports/vcftools/strelka/test/test.strelka.variants.gdepth", + "reports/vcftools/strelka/test/test.strelka.variants.geno.chisq", + "reports/vcftools/strelka/test/test.strelka.variants.geno.ld", + "reports/vcftools/strelka/test/test.strelka.variants.hap.ld", + "reports/vcftools/strelka/test/test.strelka.variants.hapcount", + "reports/vcftools/strelka/test/test.strelka.variants.het", + "reports/vcftools/strelka/test/test.strelka.variants.hwe", + "reports/vcftools/strelka/test/test.strelka.variants.idepth", + "reports/vcftools/strelka/test/test.strelka.variants.ifreqburden", + "reports/vcftools/strelka/test/test.strelka.variants.imiss", + "reports/vcftools/strelka/test/test.strelka.variants.impute.hap", + "reports/vcftools/strelka/test/test.strelka.variants.impute.hap.indv", + "reports/vcftools/strelka/test/test.strelka.variants.impute.hap.legend", + "reports/vcftools/strelka/test/test.strelka.variants.indel.hist", + "reports/vcftools/strelka/test/test.strelka.variants.interchrom.geno.ld", + "reports/vcftools/strelka/test/test.strelka.variants.interchrom.hap.ld", + "reports/vcftools/strelka/test/test.strelka.variants.kept.sites", + "reports/vcftools/strelka/test/test.strelka.variants.ldepth", + "reports/vcftools/strelka/test/test.strelka.variants.ldepth.mean", + "reports/vcftools/strelka/test/test.strelka.variants.ldhat.locs", + "reports/vcftools/strelka/test/test.strelka.variants.ldhat.sites", + "reports/vcftools/strelka/test/test.strelka.variants.list.geno.ld", + "reports/vcftools/strelka/test/test.strelka.variants.list.hap.ld", + "reports/vcftools/strelka/test/test.strelka.variants.lmiss", + "reports/vcftools/strelka/test/test.strelka.variants.lqual", + "reports/vcftools/strelka/test/test.strelka.variants.map", + "reports/vcftools/strelka/test/test.strelka.variants.mendel", + "reports/vcftools/strelka/test/test.strelka.variants.ped", + "reports/vcftools/strelka/test/test.strelka.variants.relatedness", + "reports/vcftools/strelka/test/test.strelka.variants.relatedness2", + "reports/vcftools/strelka/test/test.strelka.variants.removed.sites", + "reports/vcftools/strelka/test/test.strelka.variants.singletons", + "reports/vcftools/strelka/test/test.strelka.variants.sites.pi", + "reports/vcftools/strelka/test/test.strelka.variants.snpden", + "reports/vcftools/strelka/test/test.strelka.variants.tfam", + "reports/vcftools/strelka/test/test.strelka.variants.tped", + "reports/vcftools/strelka/test/test.strelka.variants.vcf", + "reports/vcftools/strelka/test/test.strelka.variants.weir.fst", + "reports/vcftools/strelka/test/test.strelka.variants.windowed.pi", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/test", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1", + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "timestamp": "2026-04-08T12:07:08.143766252", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.2" + } + } +} \ No newline at end of file diff --git a/tests/fastp.nf.test b/tests/fastp.nf.test new file mode 100644 index 0000000000..beeb720ba6 --- /dev/null +++ b/tests/fastp.nf.test @@ -0,0 +1,51 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --save_trimmed", + params: [ + clip_r1: 1, + clip_r2: 1, + three_prime_clip_r1: 1, + three_prime_clip_r2: 1, + length_required: 50, + tools: null, + trim_fastq: true, + save_trimmed: true, + input: "${projectDir}/tests/csv/3.0/fastq_single.csv", + genome: 'testdata.nf-core.sarek' + ] + ], + [ + name: "-profile test --trim_fastq (no save)", + params: [ + tools: null, + trim_fastq: true, + input: "${projectDir}/tests/csv/3.0/fastq_single.csv", + genome: 'testdata.nf-core.sarek' + ] + ], + [ + name: "-profile test --split_fastq 150000", + params: [ + save_split_fastqs: true, + split_fastq: 150000, + tools: null, + modules_testdata_base_path: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/', + input: "${projectDir}/tests/csv/3.0/fastq_single.csv", + igenomes_base: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/', + genome: 'testdata.nf-core.sarek' + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/fastp.nf.test.snap b/tests/fastp.nf.test.snap new file mode 100644 index 0000000000..4ed5c98628 --- /dev/null +++ b/tests/fastp.nf.test.snap @@ -0,0 +1,1087 @@ +{ + "-profile test --save_trimmed": { + "content": [ + 20, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTP": { + "fastp": "0.24.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/fastp-insert-size-plot.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp_filtered_reads_plot.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastp.txt", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/fastp-insert-size-plot.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp_filtered_reads_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastp_filtered_reads_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/fastp-insert-size-plot.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp_filtered_reads_plot-cnt.png", + "multiqc/multiqc_plots/png/fastp_filtered_reads_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/fastp-insert-size-plot.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp_filtered_reads_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastp_filtered_reads_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/fastp", + "preprocessing/fastp/test", + "preprocessing/fastp/test/test-test_L1_1.fastp.fastq.gz", + "preprocessing/fastp/test/test-test_L1_2.fastp.fastq.gz", + "preprocessing/fastp/test/test-test_L2_1.fastp.fastq.gz", + "preprocessing/fastp/test/test-test_L2_2.fastp.fastq.gz", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/fastp", + "reports/fastp/test", + "reports/fastp/test/test-test_L1.fastp.html", + "reports/fastp/test/test-test_L1.fastp.json", + "reports/fastp/test/test-test_L1.fastp.log", + "reports/fastp/test/test-test_L2.fastp.html", + "reports/fastp/test/test-test_L2.fastp.json", + "reports/fastp/test/test-test_L2.fastp.log", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test-test_L2", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.zip", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats" + ], + [ + "fastp-insert-size-plot.txt:md5,7e4ef00f8f5204cbd6c11a9d51357d76", + "fastp-seq-content-gc-plot_Read_1_After_filtering.txt:md5,0f998ee8c520a016317c34207bc13f59", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.txt:md5,6bb5a2d67ad8458c4fd09e7426b33265", + "fastp-seq-content-gc-plot_Read_2_After_filtering.txt:md5,81070ec3e258d78d2fa8dab09b90a7c5", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.txt:md5,0ebbd32b93bc6163573df7e09553f1de", + "fastp-seq-content-n-plot_Read_1_After_filtering.txt:md5,1615e97c0bc3527b545a1e2444f97cdf", + "fastp-seq-content-n-plot_Read_1_Before_filtering.txt:md5,c183bfc42987ac6460f7deb3397912eb", + "fastp-seq-content-n-plot_Read_2_After_filtering.txt:md5,22802b62f6c8acb25c72a19177fe8073", + "fastp-seq-content-n-plot_Read_2_Before_filtering.txt:md5,bec133e008bc1df7fc9ceddce7743cc1", + "fastp-seq-quality-plot_Read_1_After_filtering.txt:md5,2843b5942be805c3c5403756d3b7ecf0", + "fastp-seq-quality-plot_Read_1_Before_filtering.txt:md5,6d3f0f8dfcf9d06b5170f25560f41381", + "fastp-seq-quality-plot_Read_2_After_filtering.txt:md5,922e2838b8db8cb7237202a91a116b0d", + "fastp-seq-quality-plot_Read_2_Before_filtering.txt:md5,6df6151fdceb012dbbd3d8ed241e052d", + "fastp_filtered_reads_plot.txt:md5,2bc65f301301334619990273c6126c5d", + "fastqc-status-check-heatmap.txt:md5,a020b9689ddeb4abec16b4854fe452f1", + "fastqc_adapter_content_plot.txt:md5,2e1b72be741319e7fadbbb39d7e5b37d", + "fastqc_per_base_n_content_plot.txt:md5,ad3b971a6bb4e8ba6c844c8a03584eb8", + "fastqc_per_base_sequence_quality_plot.txt:md5,1bc03889d243a944253ac637d81ae10c", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,2c42d140ce06c08dad2b58f397c23239", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,59e22821d350bfb97c37ffd9088f5ad9", + "fastqc_per_sequence_quality_scores_plot.txt:md5,f33615cc98bb6225f39545a415fa7c0f", + "fastqc_sequence_counts_plot.txt:md5,7f0f19a58e8e54e792a751fd04a9ae13", + "fastqc_sequence_duplication_levels_plot.txt:md5,92b02e250ff78725deb9a10d510fcecc", + "fastqc_sequence_length_distribution_plot.txt:md5,fb04dce68ec566314125bc9438211b28", + "mosdepth-coverage-per-contig-single.txt:md5,29af8f8744a88e2ec0486292d541559f", + "mosdepth-cumcoverage-dist-id.txt:md5,d97ac04e524b20bf9d261c009ac92b6a", + "mosdepth_perchrom.txt:md5,29af8f8744a88e2ec0486292d541559f", + "multiqc_citations.txt:md5,0e2971e7a873c92592112775fa99fb02", + "multiqc_fastp.txt:md5,2f22f6c961e432974f27a4e56d877cec", + "multiqc_fastqc.txt:md5,bde0d0bffa62228b33fb68b7e25b6ff8", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,58ea8c88565a359af678aa588e44895c", + "samtools_alignment_plot.txt:md5,aac925e42c9da438ab9f0fb7ae77e2fa", + "test-test_L1_1.fastp.fastq.gz:md5,f1a5c524cae7be9b5ca9a4138f847cfa", + "test-test_L1_2.fastp.fastq.gz:md5,e366994a1db55b5ed3cd12482f33cee7", + "test-test_L2_1.fastp.fastq.gz:md5,f1a5c524cae7be9b5ca9a4138f847cfa", + "test-test_L2_2.fastp.fastq.gz:md5,e366994a1db55b5ed3cd12482f33cee7", + "test-test_L1.fastp.json:md5,fb88c7b5807f6c7478b01baddf8ca4e8", + "test-test_L2.fastp.json:md5,708187bd90c12b1c8c3fa7046b69dc35", + "test.md.mosdepth.global.dist.txt:md5,b1c26e3381f220e65d683048ab6b6e2a", + "test.md.mosdepth.region.dist.txt:md5,02d51752367e753a6984c12f059499ba", + "test.md.mosdepth.summary.txt:md5,f18e776c3ee8e6947c3a69c136f54860", + "test.md.regions.bed.gz:md5,a3e8ccd3f04d3aee009a109d52b69920", + "test.md.regions.bed.gz.csi:md5,74d39fd1e463ce7b77610c96f5f57daf", + "test.recal.mosdepth.global.dist.txt:md5,b1c26e3381f220e65d683048ab6b6e2a", + "test.recal.mosdepth.region.dist.txt:md5,02d51752367e753a6984c12f059499ba", + "test.recal.mosdepth.summary.txt:md5,f18e776c3ee8e6947c3a69c136f54860", + "test.recal.regions.bed.gz:md5,a3e8ccd3f04d3aee009a109d52b69920", + "test.recal.regions.bed.gz.csi:md5,74d39fd1e463ce7b77610c96f5f57daf" + ], + "No BAM files", + [ + "test.md.cram:md5,9a65845bff32f674fd01e736743dd4b5", + "test.recal.cram:md5,6730f24d62a7bc9aa1e9fb0f89ae9cde" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-17T14:34:45.9737357" + }, + "-profile test --split_fastq 150000": { + "content": [ + 26, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTP": { + "fastp": "0.24.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/fastp-insert-size-plot.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp_filtered_reads_plot.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastp.txt", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/fastp-insert-size-plot.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp_filtered_reads_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastp_filtered_reads_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/fastp-insert-size-plot.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp_filtered_reads_plot-cnt.png", + "multiqc/multiqc_plots/png/fastp_filtered_reads_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/fastp-insert-size-plot.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp_filtered_reads_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastp_filtered_reads_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/fastp", + "preprocessing/fastp/test", + "preprocessing/fastp/test/0001.test-test_L1_1.fastp.fastq.gz", + "preprocessing/fastp/test/0001.test-test_L1_2.fastp.fastq.gz", + "preprocessing/fastp/test/0001.test-test_L2_1.fastp.fastq.gz", + "preprocessing/fastp/test/0001.test-test_L2_2.fastp.fastq.gz", + "preprocessing/fastp/test/0002.test-test_L1_1.fastp.fastq.gz", + "preprocessing/fastp/test/0002.test-test_L1_2.fastp.fastq.gz", + "preprocessing/fastp/test/0002.test-test_L2_1.fastp.fastq.gz", + "preprocessing/fastp/test/0002.test-test_L2_2.fastp.fastq.gz", + "preprocessing/fastp/test/0003.test-test_L1_1.fastp.fastq.gz", + "preprocessing/fastp/test/0003.test-test_L1_2.fastp.fastq.gz", + "preprocessing/fastp/test/0003.test-test_L2_1.fastp.fastq.gz", + "preprocessing/fastp/test/0003.test-test_L2_2.fastp.fastq.gz", + "preprocessing/fastp/test/0004.test-test_L1_1.fastp.fastq.gz", + "preprocessing/fastp/test/0004.test-test_L1_2.fastp.fastq.gz", + "preprocessing/fastp/test/0004.test-test_L2_1.fastp.fastq.gz", + "preprocessing/fastp/test/0004.test-test_L2_2.fastp.fastq.gz", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/fastp", + "reports/fastp/test", + "reports/fastp/test/test-test_L1.fastp.html", + "reports/fastp/test/test-test_L1.fastp.json", + "reports/fastp/test/test-test_L1.fastp.log", + "reports/fastp/test/test-test_L2.fastp.html", + "reports/fastp/test/test-test_L2.fastp.json", + "reports/fastp/test/test-test_L2.fastp.log", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test-test_L2", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.zip", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats" + ], + [ + "fastp-insert-size-plot.txt:md5,36c1e87a700481bb9c17bfe3a3744f22", + "fastp-seq-content-gc-plot_Read_1_After_filtering.txt:md5,6bb5a2d67ad8458c4fd09e7426b33265", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.txt:md5,6bb5a2d67ad8458c4fd09e7426b33265", + "fastp-seq-content-gc-plot_Read_2_After_filtering.txt:md5,0ebbd32b93bc6163573df7e09553f1de", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.txt:md5,0ebbd32b93bc6163573df7e09553f1de", + "fastp-seq-content-n-plot_Read_1_After_filtering.txt:md5,c183bfc42987ac6460f7deb3397912eb", + "fastp-seq-content-n-plot_Read_1_Before_filtering.txt:md5,c183bfc42987ac6460f7deb3397912eb", + "fastp-seq-content-n-plot_Read_2_After_filtering.txt:md5,bec133e008bc1df7fc9ceddce7743cc1", + "fastp-seq-content-n-plot_Read_2_Before_filtering.txt:md5,bec133e008bc1df7fc9ceddce7743cc1", + "fastp-seq-quality-plot_Read_1_After_filtering.txt:md5,6d3f0f8dfcf9d06b5170f25560f41381", + "fastp-seq-quality-plot_Read_1_Before_filtering.txt:md5,6d3f0f8dfcf9d06b5170f25560f41381", + "fastp-seq-quality-plot_Read_2_After_filtering.txt:md5,6df6151fdceb012dbbd3d8ed241e052d", + "fastp-seq-quality-plot_Read_2_Before_filtering.txt:md5,6df6151fdceb012dbbd3d8ed241e052d", + "fastp_filtered_reads_plot.txt:md5,4db6ef902b0483a3301ba0e8c09f8ec8", + "fastqc-status-check-heatmap.txt:md5,a020b9689ddeb4abec16b4854fe452f1", + "fastqc_adapter_content_plot.txt:md5,2e1b72be741319e7fadbbb39d7e5b37d", + "fastqc_per_base_n_content_plot.txt:md5,ad3b971a6bb4e8ba6c844c8a03584eb8", + "fastqc_per_base_sequence_quality_plot.txt:md5,1bc03889d243a944253ac637d81ae10c", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,2c42d140ce06c08dad2b58f397c23239", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,59e22821d350bfb97c37ffd9088f5ad9", + "fastqc_per_sequence_quality_scores_plot.txt:md5,f33615cc98bb6225f39545a415fa7c0f", + "fastqc_sequence_counts_plot.txt:md5,7f0f19a58e8e54e792a751fd04a9ae13", + "fastqc_sequence_duplication_levels_plot.txt:md5,92b02e250ff78725deb9a10d510fcecc", + "fastqc_sequence_length_distribution_plot.txt:md5,fb04dce68ec566314125bc9438211b28", + "mosdepth-coverage-per-contig-single.txt:md5,b9304bb6dee33d25255e19693c0a4dd8", + "mosdepth-cumcoverage-dist-id.txt:md5,722e8c7583dd52b2c8d8bb923718c912", + "mosdepth_perchrom.txt:md5,b9304bb6dee33d25255e19693c0a4dd8", + "multiqc_citations.txt:md5,0e2971e7a873c92592112775fa99fb02", + "multiqc_fastp.txt:md5,758582c0fafe6e69ad8488eb612008de", + "multiqc_fastqc.txt:md5,bde0d0bffa62228b33fb68b7e25b6ff8", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,93ec69f6db74d9de7e4a549491ae2c73", + "samtools_alignment_plot.txt:md5,962b7dd27f48ec4bbb668c8c32cbea1b", + "0001.test-test_L1_1.fastp.fastq.gz:md5,f379a60cc2a41f39fa0c0d23edf6eb61", + "0001.test-test_L1_2.fastp.fastq.gz:md5,e54d912158088d98f8083854079e1f5c", + "0001.test-test_L2_1.fastp.fastq.gz:md5,f379a60cc2a41f39fa0c0d23edf6eb61", + "0001.test-test_L2_2.fastp.fastq.gz:md5,e54d912158088d98f8083854079e1f5c", + "0002.test-test_L1_1.fastp.fastq.gz:md5,366180a220f6b78c55b1b23b35dc7f96", + "0002.test-test_L1_2.fastp.fastq.gz:md5,66ca650f883997310858acac94c8d236", + "0002.test-test_L2_1.fastp.fastq.gz:md5,366180a220f6b78c55b1b23b35dc7f96", + "0002.test-test_L2_2.fastp.fastq.gz:md5,66ca650f883997310858acac94c8d236", + "0003.test-test_L1_1.fastp.fastq.gz:md5,46bc09415a26d50230163e613cb1324b", + "0003.test-test_L1_2.fastp.fastq.gz:md5,500404ef12d7c6f02eb3b5367f84a978", + "0003.test-test_L2_1.fastp.fastq.gz:md5,46bc09415a26d50230163e613cb1324b", + "0003.test-test_L2_2.fastp.fastq.gz:md5,500404ef12d7c6f02eb3b5367f84a978", + "0004.test-test_L1_1.fastp.fastq.gz:md5,f3b1d77aa32f019f3088810cac2605e9", + "0004.test-test_L1_2.fastp.fastq.gz:md5,e38c0529e6ba0abce221ac39c84263b0", + "0004.test-test_L2_1.fastp.fastq.gz:md5,f3b1d77aa32f019f3088810cac2605e9", + "0004.test-test_L2_2.fastp.fastq.gz:md5,e38c0529e6ba0abce221ac39c84263b0", + "test-test_L1.fastp.json:md5,2f72ca9fa3c55cfd4ff67a747f7c2e46", + "test-test_L2.fastp.json:md5,f8b183e541878de7b24e555e23e078f1", + "test.md.mosdepth.global.dist.txt:md5,e5d0c6bf323c32f5414bd48b90bb32fa", + "test.md.mosdepth.region.dist.txt:md5,5062f8b7bb536c9b77a68f4ccd2315c2", + "test.md.mosdepth.summary.txt:md5,455358d5943fd1e5f09853acff3e50b6", + "test.md.regions.bed.gz:md5,729091bfb7c08486c8d247f4629996e6", + "test.md.regions.bed.gz.csi:md5,295eb71f59c0e37b5c3cf289a467f97e", + "test.recal.mosdepth.global.dist.txt:md5,e5d0c6bf323c32f5414bd48b90bb32fa", + "test.recal.mosdepth.region.dist.txt:md5,5062f8b7bb536c9b77a68f4ccd2315c2", + "test.recal.mosdepth.summary.txt:md5,455358d5943fd1e5f09853acff3e50b6", + "test.recal.regions.bed.gz:md5,729091bfb7c08486c8d247f4629996e6", + "test.recal.regions.bed.gz.csi:md5,295eb71f59c0e37b5c3cf289a467f97e" + ], + "No BAM files", + [ + "test.md.cram:md5,109bcdc3a3c2720d46cb85268920f85c", + "test.recal.cram:md5,f89c35bd0e96b2254e36b4e3660b1269" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-06T15:10:32.391037" + }, + "-profile test --trim_fastq (no save)": { + "content": [ + 20, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTP": { + "fastp": "0.24.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/fastp-insert-size-plot.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp_filtered_reads_plot.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastp.txt", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/fastp-insert-size-plot.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp_filtered_reads_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastp_filtered_reads_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/fastp-insert-size-plot.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp_filtered_reads_plot-cnt.png", + "multiqc/multiqc_plots/png/fastp_filtered_reads_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/fastp-insert-size-plot.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp_filtered_reads_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastp_filtered_reads_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/fastp", + "reports/fastp/test", + "reports/fastp/test/test-test_L1.fastp.html", + "reports/fastp/test/test-test_L1.fastp.json", + "reports/fastp/test/test-test_L1.fastp.log", + "reports/fastp/test/test-test_L2.fastp.html", + "reports/fastp/test/test-test_L2.fastp.json", + "reports/fastp/test/test-test_L2.fastp.log", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test-test_L2", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.zip", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats" + ], + [ + "fastp-insert-size-plot.txt:md5,36c1e87a700481bb9c17bfe3a3744f22", + "fastp-seq-content-gc-plot_Read_1_After_filtering.txt:md5,ff88f2a81e44d1a7be1c54102a4bec44", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.txt:md5,6bb5a2d67ad8458c4fd09e7426b33265", + "fastp-seq-content-gc-plot_Read_2_After_filtering.txt:md5,8c96d1d0b2a649b0b65c1ad046669ab6", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.txt:md5,0ebbd32b93bc6163573df7e09553f1de", + "fastp-seq-content-n-plot_Read_1_After_filtering.txt:md5,c183bfc42987ac6460f7deb3397912eb", + "fastp-seq-content-n-plot_Read_1_Before_filtering.txt:md5,c183bfc42987ac6460f7deb3397912eb", + "fastp-seq-content-n-plot_Read_2_After_filtering.txt:md5,bec133e008bc1df7fc9ceddce7743cc1", + "fastp-seq-content-n-plot_Read_2_Before_filtering.txt:md5,bec133e008bc1df7fc9ceddce7743cc1", + "fastp-seq-quality-plot_Read_1_After_filtering.txt:md5,d0e9ae3df87efc081a237a15683c3c31", + "fastp-seq-quality-plot_Read_1_Before_filtering.txt:md5,6d3f0f8dfcf9d06b5170f25560f41381", + "fastp-seq-quality-plot_Read_2_After_filtering.txt:md5,48b2b08083ab8f2562b13578ed53be1d", + "fastp-seq-quality-plot_Read_2_Before_filtering.txt:md5,6df6151fdceb012dbbd3d8ed241e052d", + "fastp_filtered_reads_plot.txt:md5,4db6ef902b0483a3301ba0e8c09f8ec8", + "fastqc-status-check-heatmap.txt:md5,a020b9689ddeb4abec16b4854fe452f1", + "fastqc_adapter_content_plot.txt:md5,2e1b72be741319e7fadbbb39d7e5b37d", + "fastqc_per_base_n_content_plot.txt:md5,ad3b971a6bb4e8ba6c844c8a03584eb8", + "fastqc_per_base_sequence_quality_plot.txt:md5,1bc03889d243a944253ac637d81ae10c", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,2c42d140ce06c08dad2b58f397c23239", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,59e22821d350bfb97c37ffd9088f5ad9", + "fastqc_per_sequence_quality_scores_plot.txt:md5,f33615cc98bb6225f39545a415fa7c0f", + "fastqc_sequence_counts_plot.txt:md5,7f0f19a58e8e54e792a751fd04a9ae13", + "fastqc_sequence_duplication_levels_plot.txt:md5,92b02e250ff78725deb9a10d510fcecc", + "fastqc_sequence_length_distribution_plot.txt:md5,fb04dce68ec566314125bc9438211b28", + "mosdepth-coverage-per-contig-single.txt:md5,a0990e98cdd2cd5540d07f504f516ccf", + "mosdepth-cumcoverage-dist-id.txt:md5,caee7b9e5d1a451970f87d791c3e450b", + "mosdepth_perchrom.txt:md5,a0990e98cdd2cd5540d07f504f516ccf", + "multiqc_citations.txt:md5,0e2971e7a873c92592112775fa99fb02", + "multiqc_fastp.txt:md5,2e4305ae77790e1948c19acc778b508c", + "multiqc_fastqc.txt:md5,bde0d0bffa62228b33fb68b7e25b6ff8", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,ae7954084b2cd708c5e7369606bf4208", + "samtools_alignment_plot.txt:md5,438e719bf574a46726dbd2e0f1442e42", + "test-test_L1.fastp.json:md5,ced0232bc3be313b392a9f59ff970cc2", + "test-test_L2.fastp.json:md5,d05b922b85ecd0e087ba05af543f8ff8", + "test.md.mosdepth.global.dist.txt:md5,ef7c375ae07aec5540f9892b9b556b73", + "test.md.mosdepth.region.dist.txt:md5,212efff2213f6fc1c3204daf68bbb8c8", + "test.md.mosdepth.summary.txt:md5,72114393647ff64503522760218b30f0", + "test.md.regions.bed.gz:md5,985db429051ddcd5eae177da6fb55ad6", + "test.md.regions.bed.gz.csi:md5,3fa0f8272fefafe3cd840376d34a94a2", + "test.recal.mosdepth.global.dist.txt:md5,ef7c375ae07aec5540f9892b9b556b73", + "test.recal.mosdepth.region.dist.txt:md5,212efff2213f6fc1c3204daf68bbb8c8", + "test.recal.mosdepth.summary.txt:md5,72114393647ff64503522760218b30f0", + "test.recal.regions.bed.gz:md5,985db429051ddcd5eae177da6fb55ad6", + "test.recal.regions.bed.gz.csi:md5,3fa0f8272fefafe3cd840376d34a94a2" + ], + "No BAM files", + [ + "test.md.cram:md5,9d2d75316943563c93666ab23233656c", + "test.recal.cram:md5,dbd6f40b1e6d72501dc034e62e9d54eb" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-06T15:02:50.499879" + } +} \ No newline at end of file diff --git a/tests/intervals.nf.test b/tests/intervals.nf.test new file mode 100644 index 0000000000..d2790e40b2 --- /dev/null +++ b/tests/intervals.nf.test @@ -0,0 +1,72 @@ +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --intervals genome.multi_intervals.bed --tools null --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools", + params: [ + intervals: modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.multi_intervals.bed", + nucleotides_per_second: 20, + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: null, + wes: true + ] + ], + [ + name: "-profile test --no_intervals --tools null --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools", + params: [ + no_intervals: true, + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: null + ] + ], + [ + name: "-profile test --intervals false --save_reference --tools null --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools", + params: [ + intervals: false, + save_reference: true, + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: null + ] + ], + [ + name: "-profile test --intervals genome.multi_intervals.bed --tools null --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools -stub", + params: [ + intervals: modules_testdata_base_path + "/genomics/homo_sapiens/genome/genome.multi_intervals.bed", + nucleotides_per_second: 20, + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: null, + wes: true + ], + stub: true + ], + [ + name: "-profile test --no_intervals --tools null --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools -stub", + params: [ + no_intervals: true, + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: null + ], + stub: true + ], + [ + name: "-profile test --intervals false --save_reference --tools null --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools -stub", + params: [ + intervals: false, + save_reference: true, + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: null + ], + stub: true + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/intervals.nf.test.snap b/tests/intervals.nf.test.snap new file mode 100644 index 0000000000..2e2c45a879 --- /dev/null +++ b/tests/intervals.nf.test.snap @@ -0,0 +1,374 @@ +{ + "-profile test --intervals false --save_reference --tools null --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools": { + "content": [ + 10, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "BUILD_INTERVALS": { + "gawk": "5.3.0" + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "reference", + "reference/bwa", + "reference/bwa/genome.amb", + "reference/bwa/genome.ann", + "reference/bwa/genome.bwt", + "reference/bwa/genome.pac", + "reference/bwa/genome.sa", + "reference/intervals", + "reference/intervals/chr22_1-40001.bed", + "reference/intervals/chr22_1-40001.bed.gz", + "reference/intervals/fai.bed.gz" + ], + [ + "genome.amb:md5,1891c1de381b3a96d4e72f590fde20c1", + "genome.ann:md5,2df4aa2d7580639fa0fcdbcad5e2e969", + "genome.bwt:md5,815eded87e4cb6b0f1daab5c4d6e30af", + "genome.pac:md5,8569fbdb2c98c6fb16dfa73d8eacb070", + "genome.sa:md5,e7cff62b919448a3a3d0fe4aaf427594", + "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "fai.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ], + "No BAM files", + [ + "test.sorted.cram:md5,5534c350547fd253f0f2b9450362bed" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T19:53:53.927423365" + }, + "-profile test --intervals genome.multi_intervals.bed --tools null --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools": { + "content": [ + 10, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "reference" + ], + "No stable content", + "No BAM files", + [ + "test.sorted.cram:md5,5534c350547fd253f0f2b9450362bed" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T19:51:26.151477364" + }, + "-profile test --no_intervals --tools null --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools": { + "content": [ + 7, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "reference" + ], + "No stable content", + "No BAM files", + [ + "test.sorted.cram:md5,5534c350547fd253f0f2b9450362bed" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T19:52:40.407831822" + }, + "-profile test --no_intervals --tools null --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools -stub": { + "content": [ + 7, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "reference" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1", + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T19:55:39.716558447" + }, + "-profile test --intervals false --save_reference --tools null --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools -stub": { + "content": [ + 10, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "BUILD_INTERVALS": { + "gawk": "5.3.0" + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "reference", + "reference/bwa", + "reference/bwa/genome.amb", + "reference/bwa/genome.ann", + "reference/bwa/genome.bwt", + "reference/bwa/genome.pac", + "reference/bwa/genome.sa", + "reference/intervals", + "reference/intervals/fai.bed.gz", + "reference/intervals/fai.stub.bed", + "reference/intervals/fai.stub.bed.gz" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1", + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T19:56:30.038317094" + }, + "-profile test --intervals genome.multi_intervals.bed --tools null --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools -stub": { + "content": [ + 9, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "reference" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1", + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T19:54:48.88067349" + } +} diff --git a/tests/joint_calling_haplotypecaller.nf.test b/tests/joint_calling_haplotypecaller.nf.test new file mode 100644 index 0000000000..0866b0d7a5 --- /dev/null +++ b/tests/joint_calling_haplotypecaller.nf.test @@ -0,0 +1,64 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --input tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --joint_germline --dbsnp_vqsr false --known_snps_vqsr false --known_indels_vqsr false --nucleotides_per_second 20", + params: [ + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/mapped_joint_bam.csv", + step: "variant_calling", + tools: 'haplotypecaller', + joint_germline: true, + dbsnp_vqsr: false, + known_snps_vqsr: false, + known_indels_vqsr: false, + wes: true, + nucleotides_per_second: 20 + ] + ], + [ + name: "-profile test --input tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --joint_germline --dbsnp_vqsr false --known_snps_vqsr false --known_indels_vqsr false --nucleotides_per_second 100", + params: [ + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/mapped_joint_bam.csv", + step: "variant_calling", + tools: 'haplotypecaller', + joint_germline: true, + dbsnp_vqsr: false, + known_snps_vqsr: false, + known_indels_vqsr: false, + wes: true, + nucleotides_per_second: 100 + ] + ], + [ + name: "Fails with --no_intervals and --joint_germline for haplotypecaller", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_joint_bam.csv", + step: "variant_calling", + tools: 'haplotypecaller', + joint_germline: true, + no_intervals: true, + dbsnp_vqsr: false, + known_snps_vqsr: false, + known_indels_vqsr: false, + wes: true, + nucleotides_per_second: 20 + ], + failure: true, + snapshot: 'stderr,stdout', + snapshot_include: 'requires intervals' + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/joint_calling_haplotypecaller.nf.test.snap b/tests/joint_calling_haplotypecaller.nf.test.snap new file mode 100644 index 0000000000..7236ddc486 --- /dev/null +++ b/tests/joint_calling_haplotypecaller.nf.test.snap @@ -0,0 +1,453 @@ +{ + "-profile test --input tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --joint_germline --dbsnp_vqsr false --known_snps_vqsr false --known_indels_vqsr false --nucleotides_per_second 20": { + "content": [ + 26, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_GENOMICSDBIMPORT": { + "gatk4": "4.6.1.0" + }, + "GATK4_GENOTYPEGVCFS": { + "gatk4": "4.6.1.0" + }, + "GATK4_HAPLOTYPECALLER": { + "gatk4": "4.6.1.0" + }, + "MERGE_HAPLOTYPECALLER": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/haplotypecaller", + "reports/bcftools/haplotypecaller/joint_variant_calling", + "reports/bcftools/haplotypecaller/joint_variant_calling/joint_germline.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/testN", + "reports/mosdepth/testN/testN.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.summary.txt", + "reports/mosdepth/testN/testN.recal.per-base.bed.gz", + "reports/mosdepth/testN/testN.recal.per-base.bed.gz.csi", + "reports/mosdepth/testN/testN.recal.regions.bed.gz", + "reports/mosdepth/testN/testN.recal.regions.bed.gz.csi", + "reports/mosdepth/testT", + "reports/mosdepth/testT/testT.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.summary.txt", + "reports/mosdepth/testT/testT.recal.per-base.bed.gz", + "reports/mosdepth/testT/testT.recal.per-base.bed.gz.csi", + "reports/mosdepth/testT/testT.recal.regions.bed.gz", + "reports/mosdepth/testT/testT.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/testN", + "reports/samtools/testN/testN.recal.cram.stats", + "reports/samtools/testT", + "reports/samtools/testT/testT.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/haplotypecaller", + "reports/vcftools/haplotypecaller/joint_variant_calling", + "reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.FILTER.summary", + "reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.TsTv.count", + "reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.TsTv.qual", + "variant_calling", + "variant_calling/haplotypecaller", + "variant_calling/haplotypecaller/joint_variant_calling", + "variant_calling/haplotypecaller/joint_variant_calling/joint_germline.vcf.gz", + "variant_calling/haplotypecaller/joint_variant_calling/joint_germline.vcf.gz.tbi", + "variant_calling/haplotypecaller/testN", + "variant_calling/haplotypecaller/testN/testN.haplotypecaller.g.vcf.gz", + "variant_calling/haplotypecaller/testN/testN.haplotypecaller.g.vcf.gz.tbi", + "variant_calling/haplotypecaller/testT", + "variant_calling/haplotypecaller/testT/testT.haplotypecaller.g.vcf.gz", + "variant_calling/haplotypecaller/testT/testT.haplotypecaller.g.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,a9d61b7a7c54a27857b03a1c51ba6ff5", + "mosdepth-cumcoverage-dist-id.txt:md5,46e5004d6eb0d4f451706dcbb7acc2ed", + "mosdepth_perchrom.txt:md5,a9d61b7a7c54a27857b03a1c51ba6ff5", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,a0c101497a0566f60b9e8388207944ab", + "samtools_alignment_plot.txt:md5,563784066de81f3bc8ccf5fbbe82b3d5", + "joint_germline.bcftools_stats.txt:md5,1e0bcb3e7dc0e812371e4609f477569c", + "testN.recal.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e", + "testN.recal.mosdepth.region.dist.txt:md5,3a2030e5e8af7bc12720c3a5592bf921", + "testN.recal.mosdepth.summary.txt:md5,615c5c5019d88045a9ff5bbe6e63d270", + "testN.recal.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa", + "testN.recal.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4", + "testN.recal.regions.bed.gz:md5,0c8215fbea7b0bf7aba9d1781575f905", + "testN.recal.regions.bed.gz.csi:md5,5c00a1d457c387d6e71848a6d897e309", + "testT.recal.mosdepth.global.dist.txt:md5,ba97ed85645f77da6f3adad138b3cdb4", + "testT.recal.mosdepth.region.dist.txt:md5,a7eb835371dd0aaf347ccca7ebe1eb3b", + "testT.recal.mosdepth.summary.txt:md5,a937108cbf24c1430b79c861234ce22b", + "testT.recal.per-base.bed.gz:md5,fde70b7a0caef4460692540b97b3fd52", + "testT.recal.per-base.bed.gz.csi:md5,7f62d96cdff1ce2acc0c7d2d0549cb1b", + "testT.recal.regions.bed.gz:md5,8f0d545a0950c6a53225abec38553f6f", + "testT.recal.regions.bed.gz.csi:md5,5c00a1d457c387d6e71848a6d897e309", + "joint_germline.FILTER.summary:md5,2a4eb7abfb2e64e45d53fdda17530b7f", + "joint_germline.TsTv.count:md5,949fa16c755189c23a37f0ea8ecd1b26" + ], + "No BAM files", + "No CRAM files", + [ + "joint_germline.vcf.gz:md5,3551d1de0abc899d2bbd0cbe1336f832", + "testN.haplotypecaller.g.vcf.gz:md5,44590f25a9ed621929879cefc601f95a", + "testT.haplotypecaller.g.vcf.gz:md5,e37d210dda0bfe2716502353328b1103" + ], + [ + "WARN: If GATK's Haplotypecaller, Sentieon's Dnascope and/or Sentieon's Haplotyper is specified, but without `--dbsnp`, `--known_snps`, `--known_indels` or the associated resource labels (ie `known_snps_vqsr`), no variant recalibration will be done. For recalibration you must provide all of these resources." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T19:58:09.342469278" + }, + "Fails with --no_intervals and --joint_germline for haplotypecaller": { + "content": [ + [ + "pipeline_info" + ], + "No stable content", + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings", + [ + "Joint germline variant calling with GATK's HaplotypeCaller requires intervals because GenomicsDB cannot be used without them. Please provide intervals or remove `--no_intervals`." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-06T11:55:26.564281" + }, + "-profile test --input tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --joint_germline --dbsnp_vqsr false --known_snps_vqsr false --known_indels_vqsr false --nucleotides_per_second 100": { + "content": [ + 18, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_GENOMICSDBIMPORT": { + "gatk4": "4.6.1.0" + }, + "GATK4_GENOTYPEGVCFS": { + "gatk4": "4.6.1.0" + }, + "GATK4_HAPLOTYPECALLER": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/haplotypecaller", + "reports/bcftools/haplotypecaller/joint_variant_calling", + "reports/bcftools/haplotypecaller/joint_variant_calling/joint_germline.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/testN", + "reports/mosdepth/testN/testN.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.summary.txt", + "reports/mosdepth/testN/testN.recal.per-base.bed.gz", + "reports/mosdepth/testN/testN.recal.per-base.bed.gz.csi", + "reports/mosdepth/testN/testN.recal.regions.bed.gz", + "reports/mosdepth/testN/testN.recal.regions.bed.gz.csi", + "reports/mosdepth/testT", + "reports/mosdepth/testT/testT.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.summary.txt", + "reports/mosdepth/testT/testT.recal.per-base.bed.gz", + "reports/mosdepth/testT/testT.recal.per-base.bed.gz.csi", + "reports/mosdepth/testT/testT.recal.regions.bed.gz", + "reports/mosdepth/testT/testT.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/testN", + "reports/samtools/testN/testN.recal.cram.stats", + "reports/samtools/testT", + "reports/samtools/testT/testT.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/haplotypecaller", + "reports/vcftools/haplotypecaller/joint_variant_calling", + "reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.FILTER.summary", + "reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.TsTv.count", + "reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.TsTv.qual", + "variant_calling", + "variant_calling/haplotypecaller", + "variant_calling/haplotypecaller/joint_variant_calling", + "variant_calling/haplotypecaller/joint_variant_calling/joint_germline.vcf.gz", + "variant_calling/haplotypecaller/joint_variant_calling/joint_germline.vcf.gz.tbi", + "variant_calling/haplotypecaller/testN", + "variant_calling/haplotypecaller/testN/testN.haplotypecaller.g.vcf.gz", + "variant_calling/haplotypecaller/testN/testN.haplotypecaller.g.vcf.gz.tbi", + "variant_calling/haplotypecaller/testT", + "variant_calling/haplotypecaller/testT/testT.haplotypecaller.g.vcf.gz", + "variant_calling/haplotypecaller/testT/testT.haplotypecaller.g.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,a9d61b7a7c54a27857b03a1c51ba6ff5", + "mosdepth-cumcoverage-dist-id.txt:md5,46e5004d6eb0d4f451706dcbb7acc2ed", + "mosdepth_perchrom.txt:md5,a9d61b7a7c54a27857b03a1c51ba6ff5", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,a0c101497a0566f60b9e8388207944ab", + "samtools_alignment_plot.txt:md5,563784066de81f3bc8ccf5fbbe82b3d5", + "joint_germline.bcftools_stats.txt:md5,1e0bcb3e7dc0e812371e4609f477569c", + "testN.recal.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e", + "testN.recal.mosdepth.region.dist.txt:md5,3a2030e5e8af7bc12720c3a5592bf921", + "testN.recal.mosdepth.summary.txt:md5,615c5c5019d88045a9ff5bbe6e63d270", + "testN.recal.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa", + "testN.recal.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4", + "testN.recal.regions.bed.gz:md5,0c8215fbea7b0bf7aba9d1781575f905", + "testN.recal.regions.bed.gz.csi:md5,5c00a1d457c387d6e71848a6d897e309", + "testT.recal.mosdepth.global.dist.txt:md5,ba97ed85645f77da6f3adad138b3cdb4", + "testT.recal.mosdepth.region.dist.txt:md5,a7eb835371dd0aaf347ccca7ebe1eb3b", + "testT.recal.mosdepth.summary.txt:md5,a937108cbf24c1430b79c861234ce22b", + "testT.recal.per-base.bed.gz:md5,fde70b7a0caef4460692540b97b3fd52", + "testT.recal.per-base.bed.gz.csi:md5,7f62d96cdff1ce2acc0c7d2d0549cb1b", + "testT.recal.regions.bed.gz:md5,8f0d545a0950c6a53225abec38553f6f", + "testT.recal.regions.bed.gz.csi:md5,5c00a1d457c387d6e71848a6d897e309", + "joint_germline.FILTER.summary:md5,2a4eb7abfb2e64e45d53fdda17530b7f", + "joint_germline.TsTv.count:md5,949fa16c755189c23a37f0ea8ecd1b26" + ], + "No BAM files", + "No CRAM files", + [ + "joint_germline.vcf.gz:md5,3551d1de0abc899d2bbd0cbe1336f832", + "testN.haplotypecaller.g.vcf.gz:md5,44590f25a9ed621929879cefc601f95a", + "testT.haplotypecaller.g.vcf.gz:md5,e37d210dda0bfe2716502353328b1103" + ], + [ + "WARN: If GATK's Haplotypecaller, Sentieon's Dnascope and/or Sentieon's Haplotyper is specified, but without `--dbsnp`, `--known_snps`, `--known_indels` or the associated resource labels (ie `known_snps_vqsr`), no variant recalibration will be done. For recalibration you must provide all of these resources." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T19:59:28.831246669" + } +} diff --git a/tests/joint_calling_mutect2.nf.test b/tests/joint_calling_mutect2.nf.test new file mode 100644 index 0000000000..44c1528f3d --- /dev/null +++ b/tests/joint_calling_mutect2.nf.test @@ -0,0 +1,50 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + profile "test,mutect" + + def test_scenario = [ + [ + name: "-profile test --input tests/csv/3.0/recalibrated_somatic_joint.csv --tools mutect2 --joint_mutect2", + params: [ + genome: null, + igenomes_ignore: true, + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic_joint.csv", + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + joint_mutect2: true, + step: "variant_calling", + tools: 'mutect2', + wes: true + ] + ], + [ + name: "-profile test --input tests/csv/3.0/recalibrated_tumoronly_joint.csv --tools mutect2 --joint_mutect2", + params: [ + genome: null, + igenomes_ignore: true, + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/recalibrated_tumoronly_joint.csv", + joint_mutect2: true, + step: "variant_calling", + tools: 'mutect2', + wes: true + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/joint_calling_mutect2.nf.test.snap b/tests/joint_calling_mutect2.nf.test.snap new file mode 100644 index 0000000000..1ffb84ab3c --- /dev/null +++ b/tests/joint_calling_mutect2.nf.test.snap @@ -0,0 +1,438 @@ +{ + "-profile test --input tests/csv/3.0/recalibrated_somatic_joint.csv --tools mutect2 --joint_mutect2": { + "content": [ + 17, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "LEARNREADORIENTATIONMODEL": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MUTECT2_PAIRED": { + "gatk4": "4.6.1.0" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/mutect2", + "reports/bcftools/mutect2/test", + "reports/bcftools/mutect2/test/test.mutect2.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample1", + "reports/mosdepth/sample1/sample1.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.summary.txt", + "reports/mosdepth/sample1/sample1.recal.per-base.bed.gz", + "reports/mosdepth/sample1/sample1.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz.csi", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample1", + "reports/samtools/sample1/sample1.recal.cram.stats", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/mutect2", + "reports/vcftools/mutect2/test", + "reports/vcftools/mutect2/test/test.mutect2.FILTER.summary", + "reports/vcftools/mutect2/test/test.mutect2.TsTv.count", + "reports/vcftools/mutect2/test/test.mutect2.TsTv.qual", + "variant_calling", + "variant_calling/mutect2", + "variant_calling/mutect2/test", + "variant_calling/mutect2/test/test.mutect2.artifactprior.tar.gz", + "variant_calling/mutect2/test/test.mutect2.vcf.gz", + "variant_calling/mutect2/test/test.mutect2.vcf.gz.stats", + "variant_calling/mutect2/test/test.mutect2.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,05b840db853703d42c6c0be360cb573e", + "mosdepth-cumcoverage-dist-id.txt:md5,f08ab9b4898020d43b2e0ff77212f5b2", + "mosdepth_perchrom.txt:md5,05b840db853703d42c6c0be360cb573e", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,fe7c4ec200c94df7ff97a0e8cbec2f70", + "samtools_alignment_plot.txt:md5,82d6fb277fbbbd221a92ebda4567d9d9", + "test.mutect2.bcftools_stats.txt:md5,45096ddcb6f27a59224b19a8a7c6b1ac", + "sample1.recal.mosdepth.global.dist.txt:md5,69e29702ef01fd8f6c7a5468fc35a16a", + "sample1.recal.mosdepth.region.dist.txt:md5,6ec49cd7d510c2eb3d9d90fdb79b783a", + "sample1.recal.mosdepth.summary.txt:md5,103098d0bf76ed82d2b87d5f242b099a", + "sample1.recal.per-base.bed.gz:md5,297f96648928d0ca5184223fb9941e7c", + "sample1.recal.per-base.bed.gz.csi:md5,c67dcd711b096eb42f43784d5eadbc0d", + "sample1.recal.regions.bed.gz:md5,314ce8d7273eff353072108aa77c327c", + "sample1.recal.regions.bed.gz.csi:md5,9cb0ad7039a3b703d16ca7d5b835c0ee", + "sample2.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample2.recal.mosdepth.region.dist.txt:md5,39005ffaac22871ffaaf19656fe69c5b", + "sample2.recal.mosdepth.summary.txt:md5,68d4b98f17361fddf73052ead34fa370", + "sample2.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample2.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample2.recal.regions.bed.gz:md5,b7561bc56a955f7db0f11e67e2ec0386", + "sample2.recal.regions.bed.gz.csi:md5,393c2749068304d8545b501b9d4658e4", + "sample3.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample3.recal.mosdepth.region.dist.txt:md5,39005ffaac22871ffaaf19656fe69c5b", + "sample3.recal.mosdepth.summary.txt:md5,68d4b98f17361fddf73052ead34fa370", + "sample3.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample3.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample3.recal.regions.bed.gz:md5,b7561bc56a955f7db0f11e67e2ec0386", + "sample3.recal.regions.bed.gz.csi:md5,393c2749068304d8545b501b9d4658e4", + "test.mutect2.FILTER.summary:md5,19fd44ff1fbdfe422473bf1f5dc9cdb3", + "test.mutect2.TsTv.count:md5,aa51bde6080c015c6aa6c8254977dd11", + "test.mutect2.vcf.gz.stats:md5,6340205d9cc91fc3c2d243fc151c210b" + ], + "No BAM files", + "No CRAM files", + [ + "test.mutect2.vcf.gz:md5,593f6a3b49d8a4e44b1e2696883116c0" + ], + [ + "WARN: If Mutect2 is specified without a germline resource, no filtering will be done.", + "WARN: No Panel-of-normal was specified for Mutect2." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T20:17:32.428293264" + }, + "-profile test --input tests/csv/3.0/recalibrated_tumoronly_joint.csv --tools mutect2 --joint_mutect2": { + "content": [ + 16, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "BUILD_INTERVALS": { + "gawk": "5.3.0" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "LEARNREADORIENTATIONMODEL": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MUTECT2": { + "gatk4": "4.6.1.0" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/mutect2", + "reports/bcftools/mutect2/test", + "reports/bcftools/mutect2/test/test.mutect2.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/mutect2", + "reports/vcftools/mutect2/test", + "reports/vcftools/mutect2/test/test.mutect2.FILTER.summary", + "reports/vcftools/mutect2/test/test.mutect2.TsTv.count", + "reports/vcftools/mutect2/test/test.mutect2.TsTv.qual", + "variant_calling", + "variant_calling/mutect2", + "variant_calling/mutect2/test", + "variant_calling/mutect2/test/test.mutect2.artifactprior.tar.gz", + "variant_calling/mutect2/test/test.mutect2.vcf.gz", + "variant_calling/mutect2/test/test.mutect2.vcf.gz.stats", + "variant_calling/mutect2/test/test.mutect2.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,a14350b5c82288d538ea43992a65be99", + "mosdepth-cumcoverage-dist-id.txt:md5,0bd6d5120cacef0c90192267731031a5", + "mosdepth_perchrom.txt:md5,a14350b5c82288d538ea43992a65be99", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,e6019a625137ced6d426a503cd652559", + "samtools_alignment_plot.txt:md5,cb01fb538637d281195e46e557031c7a", + "test.mutect2.bcftools_stats.txt:md5,9114eb52e42ff6b8475e7a7773c21eee", + "sample2.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample2.recal.mosdepth.region.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample2.recal.mosdepth.summary.txt:md5,b0b47739dcafeeb1a9e6218b8abca1e0", + "sample2.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample2.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample2.recal.regions.bed.gz:md5,fb0efeba20ea272b7b709cf65246689e", + "sample2.recal.regions.bed.gz.csi:md5,e8452848671e9e5c147ff4cceee944af", + "sample3.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample3.recal.mosdepth.region.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample3.recal.mosdepth.summary.txt:md5,b0b47739dcafeeb1a9e6218b8abca1e0", + "sample3.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample3.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample3.recal.regions.bed.gz:md5,fb0efeba20ea272b7b709cf65246689e", + "sample3.recal.regions.bed.gz.csi:md5,e8452848671e9e5c147ff4cceee944af", + "test.mutect2.FILTER.summary:md5,b55562501b24e3cec53c22ffe0ce346f", + "test.mutect2.TsTv.count:md5,d06a480e205f2f894db2bb51846b4c39", + "test.mutect2.vcf.gz.stats:md5,bd9ff0f343dd60fdd2860fdeff617a51" + ], + "No BAM files", + "No CRAM files", + [ + "test.mutect2.vcf.gz:md5,e064ce0953bb07416f3bc8a60c761238" + ], + [ + "WARN: If Mutect2 is specified without a germline resource, no filtering will be done.", + "WARN: No Panel-of-normal was specified for Mutect2." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T20:42:22.769087844" + } +} diff --git a/tests/lane_integer.nf.test b/tests/lane_integer.nf.test new file mode 100644 index 0000000000..3fe8d05d72 --- /dev/null +++ b/tests/lane_integer.nf.test @@ -0,0 +1,23 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --input tests/csv/3.0/fastq_single_integer_lane.csv --tools null --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_single_integer_lane.csv", + tools: null, + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools' + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/lane_integer.nf.test.snap b/tests/lane_integer.nf.test.snap new file mode 100644 index 0000000000..fb7e0633d2 --- /dev/null +++ b/tests/lane_integer.nf.test.snap @@ -0,0 +1,65 @@ +{ + "-profile test --input tests/csv/3.0/fastq_single_integer_lane.csv --tools null --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools": { + "content": [ + 10, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "reference" + ], + "No stable content", + "No BAM files", + [ + "test.sorted.cram:md5,5534c350547fd253f0f2b9450362bed" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T20:43:47.875976457" + } +} diff --git a/tests/lib/UTILS.groovy b/tests/lib/UTILS.groovy new file mode 100644 index 0000000000..287f312e12 --- /dev/null +++ b/tests/lib/UTILS.groovy @@ -0,0 +1,192 @@ +// Helper functions for pipeline tests + +class UTILS { + + public static def getAssertions = { Map args -> + // Mandatory, as we always need an outdir + def outdir = args.outdir + + // Get scenario and extract all properties dynamically + def scenario = args.scenario ?: [:] + + // Pass down workflow for std capture + def workflow = args.workflow + + // These strings are not stable and should be ignored + def snapshot_ignore_list = [ + "Creating env using", + "Downloading plugin", + "Got an interrupted exception while taking agent result", + "Pulling Singularity image", + "Staging foreign file", + "Unable to resume cached task", + "Unable to stage foreign file", + ] + + // stable_name: All files + folders in ${outdir}/ with a stable name + def stable_name = getAllFilesFromDir(outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_content: All files in ${outdir}/ with stable content + def stable_content = getAllFilesFromDir(outdir, ignoreFile: 'tests/.nftignore', ignore: [scenario.ignoreFiles ]) + // bam_files: All bam files + def bam_files = getAllFilesFromDir(outdir, include: ['**/*.bam'], ignore: [scenario.ignoreFiles ]) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(outdir, include: ['**/*.cram'], ignore: [scenario.ignoreFiles ]) + // Fasta file for cram verification with nft-bam + def fasta_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + def fasta = fasta_base + 'genomics/homo_sapiens/genome/genome.fasta' + // txt_files: MuSE txt files + def txt_files = getAllFilesFromDir(outdir, include: ['**/*.MuSE.txt']) + // vcf_files: All vcf files + def vcf_files = getAllFilesFromDir(outdir, include: ['**/*.vcf{,.gz}'], ignore: [scenario.ignoreFiles ]) + // freebayes_unfiltered: vcf files from freebayes without quality filtering + def freebayes_unfiltered = getAllFilesFromDir(outdir, include: ['**/*.freebayes.vcf.gz']) + // varlociraptor vcf + def varlociraptor_vcf = getAllFilesFromDir(outdir, include: ['**/*.varlociraptor.{vcf}{,.gz}']) + + def assertion = [] + + if (!scenario.failure) { + assertion.add(workflow.trace.succeeded().size()) + assertion.add(removeFromYamlMap("${outdir}/pipeline_info/nf_core_sarek_software_mqc_versions.yml", "Workflow")?: 'No versions') + } + + // At least always pipeline_info/ is created and stable + assertion.add(stable_name) + + if (!scenario.stub) { + assertion.add(stable_content.isEmpty() ? 'No stable content' : stable_content) + assertion.add(bam_files.isEmpty() ? 'No BAM files' : bam_files.collect { file -> file.getName() + ":md5," + bam(file.toString()).readsMD5 }) + assertion.add(cram_files.isEmpty() ? 'No CRAM files' : cram_files.collect { file -> file.getName() + ":md5," + cram(file.toString(), fasta).readsMD5 }) + if (scenario.include_muse_txt) { + // It will skip the first line of the txt file + assertion.add(txt_files.isEmpty() ? 'No TXT files' : txt_files.collect{ file -> file.getName() + ":md5," + file.readLines()[2..-1].join('\n').md5() }) + } + if (scenario.include_freebayes_unfiltered) { + // It will only print the vcf summary to avoid differing md5sums because of small differences in QUAL score + assertion.add(freebayes_unfiltered.isEmpty() ? 'No Freebayes unfiltered VCF files' : freebayes_unfiltered.collect { file -> [ file.getName(), path(file.toString()).vcf.summary ] }) + } + if (scenario.no_vcf_md5sum) { + // Will print the summary instead of the md5sum for vcf files + assertion.add(vcf_files.isEmpty() ? 'No VCF files' : vcf_files.collect { file -> [ file.getName(), path(file.toString()).vcf.summary ] }) + } else { + assertion.add(vcf_files.isEmpty() ? 'No VCF files' : vcf_files.collect { file -> file.getName() + ":md5," + path(file.toString()).vcf.variantsMD5 }) + if (scenario.include_varlociraptor_vcf) { + // It will use the summary method to extract the vcf file content + assertion.add(varlociraptor_vcf.isEmpty() ? 'No Varlociraptor VCF files' : varlociraptor_vcf.collect { file -> file.getName() + ":summary," + path(file.toString()).vcf.summary }) + } + } + } + + // If we have a snapshot options in scenario then we allow to capture either stderr, stdout or both + // With options to include specific stings + def workflow_std = [] + // Otherwise, we always capture stdout and stderr for any WARN message + // Both have additional possibilities to ignore some strings + def filter_args = [ignore: snapshot_ignore_list + (scenario.snapshot_ignore ?: [])] + + workflow_std = workflow.stderr + workflow.stdout + filter_args.include = ["WARN"] + + assertion.add(filterNextflowOutput(workflow_std, filter_args) ?: "No warnings") + + if (scenario.snapshot) { + workflow_std = scenario.snapshot.split(',') + .findAll { it in ['stderr', 'stdout'] } + .collect { workflow."$it" } + .flatten() + + filter_args.remove('include') + + if (scenario.snapshot_include) { + filter_args.include = [scenario.snapshot_include] + } + + assertion.add(filterNextflowOutput(workflow_std, filter_args) ?: "No content") + } + + return assertion + } + + public static def getTest = { scenario -> + // This function returns a closure that will be used to run the test and the assertion + // It will create tags or options based on the scenario + + return { + // If the test is for a gpu, we add the gpu tag + // Otherwise, we add the cpu tag + // If the tests has no conda incompatibilities + // then we append "_conda" to the cpu/gpu tag + // If the test is for a stub, we add options -stub + // And we append "_stub" to the cpu/gpu tag + + // All options should be: + // gpu (this is the default for gpu) + // cpu (this is the default for tests without conda) + // gpu_conda (this should never happen) + // cpu_conda (this is the default for tests with conda compatibility) + // gpu_stub + // cpu_stub + // gpu_conda_stub (this should never happen) + // cpu_conda_stub + + tag "pipeline" + tag "pipeline_sarek" + + options "-output-dir ${outputDir}${scenario.stub ? ' -stub' : ''}" + + if (scenario.gpu) { + tag "gpu${!scenario.no_conda ? '_conda' : ''}${scenario.stub ? '_stub' : ''}" + } + + if (!scenario.gpu) { + tag "cpu${!scenario.no_conda ? '_conda' : ''}${scenario.stub ? '_stub' : ''}" + } + + // If a tag is provided, add it to the test + if (scenario.tag) { + tag scenario.tag + } + + when { + params { + // Mandatory, as we always need an outdir + outdir = "${outputDir}" + // Apply scenario-specific params + scenario.params.each { key, value -> + delegate."$key" = value + } + } + } + + then { + // Assert failure/success, and fails early so we don't pollute console with massive diffs + if (scenario.failure) { + assert workflow.failed + } else { + assert workflow.success + } + assertAll( + { assert snapshot( + // All assertions based on the scenario + *UTILS.getAssertions( + outdir: params.outdir, + scenario: scenario, + workflow: workflow + ) + ).match() } + ) + } + cleanup { + if (System.getenv('NFT_CLEANUP')) { + println "" + println "CLEANUP" + println "Set NFT_CLEANUP to false to disable." + println "The following folders will be deleted:" + println "- ${workDir}" + + new File("${workDir}").deleteDir() + } + } + } + } +} diff --git a/tests/multi_lane.nf.test b/tests/multi_lane.nf.test new file mode 100644 index 0000000000..92e983da65 --- /dev/null +++ b/tests/multi_lane.nf.test @@ -0,0 +1,33 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --input tests/csv/3.0/fastq_multi_lane.csv --save_mapped --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools --tools null", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_multi_lane.csv", + save_mapped: true, + skip_tools: 'fastqc,mosdepth,multiqc,samtools', + tools: null + ] + ], + [ + name: "-profile test --input tests/csv/3.0/fastq_umi_multi_lane.csv --umi_read_structure '+T 7M1S+T' --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools --tools null", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_umi_multi_lane.csv", + umi_read_structure: '+T 7M1S+T', + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: null + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/multi_lane.nf.test.snap b/tests/multi_lane.nf.test.snap new file mode 100644 index 0000000000..9ef6d94bbb --- /dev/null +++ b/tests/multi_lane.nf.test.snap @@ -0,0 +1,225 @@ +{ + "-profile test --input tests/csv/3.0/fastq_multi_lane.csv --save_mapped --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools --tools null": { + "content": [ + 21, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "preprocessing/mapped/test2", + "preprocessing/mapped/test2/test2.sorted.cram", + "preprocessing/mapped/test2/test2.sorted.cram.crai", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/markduplicates/test2", + "preprocessing/markduplicates/test2/test2.md.cram", + "preprocessing/markduplicates/test2/test2.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recal_table/test2", + "preprocessing/recal_table/test2/test2.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "preprocessing/recalibrated/test2", + "preprocessing/recalibrated/test2/test2.recal.cram", + "preprocessing/recalibrated/test2/test2.recal.cram.crai", + "reference", + "reports", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/markduplicates/test2", + "reports/markduplicates/test2/test2.md.cram.metrics" + ], + "No stable content", + "No BAM files", + [ + "test.sorted.cram:md5,5534c350547fd253f0f2b9450362bed", + "test2.sorted.cram:md5,bac87cf9290577fd9a4def63e046031f", + "test.md.cram:md5,724c601c9daf019d356a53a7d5e1c8b1", + "test2.md.cram:md5,bac87cf9290577fd9a4def63e046031f", + "test.recal.cram:md5,dbd6f40b1e6d72501dc034e62e9d54eb", + "test2.recal.cram:md5,f4205ab086600ba2927e1468dc732976" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998513#1/1", + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-06T18:03:04.332516" + }, + "-profile test --input tests/csv/3.0/fastq_umi_multi_lane.csv --umi_read_structure '+T 7M1S+T' --skip_tools baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools --tools null": { + "content": [ + 25, + { + "BAM2FASTQ": { + "samtools": 1.21 + }, + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CALLUMICONSENSUS": { + "fgbio": "2.4.0" + }, + "CAT_FASTQ": { + "cat": 9.5 + }, + "COLLATE_FASTQ_MAP": { + "samtools": 1.21 + }, + "COLLATE_FASTQ_UNMAP": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQTOBAM": { + "fgbio": "2.4.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GROUPREADSBYUMI": { + "fgbio": "2.4.0" + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_CONSENSUS": { + "samtools": 1.21 + }, + "SAMTOOLS_MERGE_UNMAP": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_MAP_MAP": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_MAP_UNMAP": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_UNMAP_MAP": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_UNMAP_UNMAP": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "preprocessing/umi", + "preprocessing/umi/test", + "preprocessing/umi/test/test_umi-consensus.bam", + "reference", + "reports", + "reports/umi", + "reports/umi/test_umi-grouped_histogram.txt" + ], + [ + "test_umi-grouped_histogram.txt:md5,47440eca0cc1e70be22fe2a2ce81dfbf" + ], + [ + "test_umi-consensus.bam:md5,122d60b069b76a217b6fe91d1765dae3" + ], + [ + "test.sorted.cram:md5,450f711661b0222e8496ea3140fe7fff" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2.umi_1.fastq.gz): Cannot extract flowcell ID from @811842/1", + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_umi_cleanname_1.fastq.gz): Cannot extract flowcell ID from @922332" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-11T11:53:37.119233" + } +} \ No newline at end of file diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 0000000000..c4354c6561 --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,29 @@ +/* +======================================================================================== + Nextflow config file for running nf-test tests +======================================================================================== +*/ + +params { + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' +} + +aws.client.anonymous = true // fixes S3 access issues on self-hosted runners + +process { + withName: 'SENTIEON_VARCAL_SNP' { + // complains that training data is constant for MQRankSum, ReadPosRankSum, SOR + ext.args = { '--annotation QD --annotation FS --annotation DP --var_type SNP' } + } +} + +// NOTE This is how pipeline users will use Sentieon in real world use +env.SENTIEON_LICENSE = env('SENTIEON_LICSRVR_IP') ?: 'null' + +// NOTE This should only happen in GitHub actions or nf-core MegaTests +env.SENTIEON_AUTH_MECH = env('SENTIEON_AUTH_MECH') ?: 'null' +env.SENTIEON_AUTH_DATA = secrets.SENTIEON_AUTH_DATA ?: 'null' + +// NOTE This is how pipeline users will test out Sentieon with a license file +// nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) diff --git a/tests/postprocess_concatenation.nf.test b/tests/postprocess_concatenation.nf.test new file mode 100644 index 0000000000..5f1b820b8a --- /dev/null +++ b/tests/postprocess_concatenation.nf.test @@ -0,0 +1,27 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --concatenate_vcfs --tools freebayes,strelka", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_joint_bam.csv", + step: 'variant_calling', + concatenate_vcfs: true, + tools: 'freebayes,strelka' + ], + include_freebayes_unfiltered: true, + ignoreFiles: '**/{testN.germline,testT.germline,*freebayes}.vcf{,.gz}', + no_conda: true + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/postprocess_concatenation.nf.test.snap b/tests/postprocess_concatenation.nf.test.snap new file mode 100644 index 0000000000..b588ea66a8 --- /dev/null +++ b/tests/postprocess_concatenation.nf.test.snap @@ -0,0 +1,292 @@ +{ + "-profile test --concatenate_vcfs --tools freebayes,strelka": { + "content": [ + 53, + { + "ADD_INFO_TO_VCF": { + "gawk": "5.3.0" + }, + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GERMLINE_VCFS_CONCAT": { + "bcftools": 1.21 + }, + "GERMLINE_VCFS_CONCAT_SORT": { + "bcftools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_EXT_VCF": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_VC_FREEBAYES": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/testN", + "reports/bcftools/freebayes/testN/testN.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/freebayes/testT", + "reports/bcftools/freebayes/testT/testT.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/strelka", + "reports/bcftools/strelka/testN", + "reports/bcftools/strelka/testN/testN.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/testT", + "reports/bcftools/strelka/testT/testT.strelka.variants.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/testN", + "reports/mosdepth/testN/testN.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.summary.txt", + "reports/mosdepth/testN/testN.recal.regions.bed.gz", + "reports/mosdepth/testN/testN.recal.regions.bed.gz.csi", + "reports/mosdepth/testT", + "reports/mosdepth/testT/testT.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.summary.txt", + "reports/mosdepth/testT/testT.recal.regions.bed.gz", + "reports/mosdepth/testT/testT.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/testN", + "reports/samtools/testN/testN.recal.cram.stats", + "reports/samtools/testT", + "reports/samtools/testT/testT.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/testN", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.TsTv.qual", + "reports/vcftools/freebayes/testT", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.TsTv.qual", + "reports/vcftools/strelka", + "reports/vcftools/strelka/testN", + "reports/vcftools/strelka/testN/testN.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/testN/testN.strelka.variants.TsTv.count", + "reports/vcftools/strelka/testN/testN.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/testT", + "reports/vcftools/strelka/testT/testT.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/testT/testT.strelka.variants.TsTv.count", + "reports/vcftools/strelka/testT/testT.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/concat", + "variant_calling/concat/testN", + "variant_calling/concat/testN/testN.germline.vcf.gz", + "variant_calling/concat/testN/testN.germline.vcf.gz.tbi", + "variant_calling/concat/testT", + "variant_calling/concat/testT/testT.germline.vcf.gz", + "variant_calling/concat/testT/testT.germline.vcf.gz.tbi", + "variant_calling/freebayes", + "variant_calling/freebayes/testN", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/testN/testN.freebayes.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.vcf.gz.tbi", + "variant_calling/freebayes/testT", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/testT/testT.freebayes.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.vcf.gz.tbi", + "variant_calling/strelka", + "variant_calling/strelka/testN", + "variant_calling/strelka/testN/testN.strelka.genome.vcf.gz", + "variant_calling/strelka/testN/testN.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/testN/testN.strelka.variants.vcf.gz", + "variant_calling/strelka/testN/testN.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/testT", + "variant_calling/strelka/testT/testT.strelka.genome.vcf.gz", + "variant_calling/strelka/testT/testT.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/testT/testT.strelka.variants.vcf.gz", + "variant_calling/strelka/testT/testT.strelka.variants.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,9cfd38310d4da59c7242b2a7ae6bd651", + "mosdepth-cumcoverage-dist-id.txt:md5,f0177ed551b6ec930854d0d221904ec0", + "mosdepth_perchrom.txt:md5,9cfd38310d4da59c7242b2a7ae6bd651", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,a0c101497a0566f60b9e8388207944ab", + "samtools_alignment_plot.txt:md5,563784066de81f3bc8ccf5fbbe82b3d5", + "testN.freebayes.filtered.bcftools_stats.txt:md5,96c5e6b9e55f1bb67fca7886fb322b67", + "testT.freebayes.filtered.bcftools_stats.txt:md5,ce8b7afaace836b6a00bd8ccdc980500", + "testN.strelka.variants.bcftools_stats.txt:md5,f11c9e5e9820868809b5465970c7cc06", + "testT.strelka.variants.bcftools_stats.txt:md5,ac070b6cf4de7540a227265971244b31", + "testN.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "testN.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "testN.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "testN.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "testN.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "testT.recal.mosdepth.global.dist.txt:md5,3106c114529adc4231badeb3bb38b6d1", + "testT.recal.mosdepth.region.dist.txt:md5,ccf646922b05cb4759c4f89072be2b69", + "testT.recal.mosdepth.summary.txt:md5,024649a659caff330dfbef4ac3560542", + "testT.recal.regions.bed.gz:md5,14b36a2cf428840aab471f95cfbe399f", + "testT.recal.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917", + "testN.freebayes.filtered.FILTER.summary:md5,e2f8e86fb18631c0bf6ccd7d9d4039c8", + "testN.freebayes.filtered.TsTv.count:md5,89562fef808b5c3db629682d36fd86fc", + "testT.freebayes.filtered.FILTER.summary:md5,4b25443c427fd27761017e4f3a556b29", + "testT.freebayes.filtered.TsTv.count:md5,803d74e40f7716202bae2a3a81c1ddfc", + "testN.strelka.variants.FILTER.summary:md5,39ff2cc8eb7495a14a6b76e0ab627027", + "testN.strelka.variants.TsTv.count:md5,ee7dafc8d941b8502a04a63dc3126fff", + "testT.strelka.variants.FILTER.summary:md5,ad4c8982a91cef84d0c1ff827a0ffcf3", + "testT.strelka.variants.TsTv.count:md5,a32d1781bd32b81ec18c6e0a191c0efe" + ], + "No BAM files", + "No CRAM files", + [ + [ + "testN.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=7, phased=false, phasedAutodetect=false]" + ], + [ + "testT.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=10, phased=false, phasedAutodetect=false]" + ] + ], + [ + "testN.freebayes.filtered.vcf.gz:md5,7a08d55c866e45d635cf378825ddba74", + "testT.freebayes.filtered.vcf.gz:md5,dc1b49ffa2401b2a5193a0527bf8cd0a", + "testN.strelka.genome.vcf.gz:md5,325ea84ccf1ec1cf7a6c5c4aae1c0d1", + "testN.strelka.variants.vcf.gz:md5,3a3b3e67614ec91f4f0b88999dff454b", + "testT.strelka.genome.vcf.gz:md5,ece653410947a3ace10296c2cc5ff095", + "testT.strelka.variants.vcf.gz:md5,b1cf29a73f6b423cf26ab1a71847c9b" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T11:06:26.54547" + } +} diff --git a/tests/postprocess_concatenation_normalization.nf.test b/tests/postprocess_concatenation_normalization.nf.test new file mode 100644 index 0000000000..eb42290608 --- /dev/null +++ b/tests/postprocess_concatenation_normalization.nf.test @@ -0,0 +1,42 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --normalize_vcfs --concatenate_vcfs --tools freebayes,strelka", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_joint_bam.csv", + step: 'variant_calling', + normalize_vcfs: true, + concatenate_vcfs: true, + tools: 'freebayes,strelka' + ], + include_freebayes_unfiltered: true, + ignoreFiles: '**/{testN.germline,testT.germline,*freebayes,*freebayes.filtered.bcftools_filtered*}.vcf{,.gz}', + no_conda: true + ], + [ + name: "-profile test --filter_vcfs --normalize_vcfs --concatenate_vcfs --tools freebayes,strelka", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_joint_bam.csv", + step: 'variant_calling', + normalize_vcfs: true, + concatenate_vcfs: true, + filter_vcfs: true, + tools: 'freebayes,strelka' + ], + include_freebayes_unfiltered: true, + ignoreFiles: '**/{testN.germline,testT.germline,*freebayes,*freebayes.filtered.bcftools_filtered*}.vcf{,.gz}', + no_conda: true + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/postprocess_concatenation_normalization.nf.test.snap b/tests/postprocess_concatenation_normalization.nf.test.snap new file mode 100644 index 0000000000..37b39759ec --- /dev/null +++ b/tests/postprocess_concatenation_normalization.nf.test.snap @@ -0,0 +1,638 @@ +{ + "-profile test --filter_vcfs --normalize_vcfs --concatenate_vcfs --tools freebayes,strelka": { + "content": [ + 73, + { + "ADD_INFO_TO_VCF": { + "gawk": "5.3.0" + }, + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FILTER_VCFS": { + "bcftools": 1.22 + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GERMLINE_VCFS_CONCAT": { + "bcftools": 1.21 + }, + "GERMLINE_VCFS_CONCAT_SORT": { + "bcftools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_EXT_VCF": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_VC_FREEBAYES": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFS_NORM": { + "bcftools": 1.21 + }, + "VCFS_NORM_SORT": { + "bcftools": 1.21 + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/testN", + "reports/bcftools/freebayes/testN/testN.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/freebayes/testT", + "reports/bcftools/freebayes/testT/testT.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/strelka", + "reports/bcftools/strelka/testN", + "reports/bcftools/strelka/testN/testN.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/testT", + "reports/bcftools/strelka/testT/testT.strelka.variants.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/testN", + "reports/mosdepth/testN/testN.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.summary.txt", + "reports/mosdepth/testN/testN.recal.regions.bed.gz", + "reports/mosdepth/testN/testN.recal.regions.bed.gz.csi", + "reports/mosdepth/testT", + "reports/mosdepth/testT/testT.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.summary.txt", + "reports/mosdepth/testT/testT.recal.regions.bed.gz", + "reports/mosdepth/testT/testT.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/testN", + "reports/samtools/testN/testN.recal.cram.stats", + "reports/samtools/testT", + "reports/samtools/testT/testT.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/testN", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.TsTv.qual", + "reports/vcftools/freebayes/testT", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.TsTv.qual", + "reports/vcftools/strelka", + "reports/vcftools/strelka/testN", + "reports/vcftools/strelka/testN/testN.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/testN/testN.strelka.variants.TsTv.count", + "reports/vcftools/strelka/testN/testN.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/testT", + "reports/vcftools/strelka/testT/testT.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/testT/testT.strelka.variants.TsTv.count", + "reports/vcftools/strelka/testT/testT.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/concat", + "variant_calling/concat/testN", + "variant_calling/concat/testN/testN.germline.vcf.gz", + "variant_calling/concat/testN/testN.germline.vcf.gz.tbi", + "variant_calling/concat/testT", + "variant_calling/concat/testT/testT.germline.vcf.gz", + "variant_calling/concat/testT/testT.germline.vcf.gz.tbi", + "variant_calling/filtered", + "variant_calling/filtered/testN", + "variant_calling/filtered/testN/testN.freebayes.filtered.bcftools_filtered.vcf.gz", + "variant_calling/filtered/testN/testN.freebayes.filtered.bcftools_filtered.vcf.gz.tbi", + "variant_calling/filtered/testN/testN.strelka.variants.bcftools_filtered.vcf.gz", + "variant_calling/filtered/testN/testN.strelka.variants.bcftools_filtered.vcf.gz.tbi", + "variant_calling/filtered/testT", + "variant_calling/filtered/testT/testT.freebayes.filtered.bcftools_filtered.vcf.gz", + "variant_calling/filtered/testT/testT.freebayes.filtered.bcftools_filtered.vcf.gz.tbi", + "variant_calling/filtered/testT/testT.strelka.variants.bcftools_filtered.vcf.gz", + "variant_calling/filtered/testT/testT.strelka.variants.bcftools_filtered.vcf.gz.tbi", + "variant_calling/freebayes", + "variant_calling/freebayes/testN", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/testN/testN.freebayes.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.vcf.gz.tbi", + "variant_calling/freebayes/testT", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/testT/testT.freebayes.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.vcf.gz.tbi", + "variant_calling/normalized", + "variant_calling/normalized/testN", + "variant_calling/normalized/testN/testN.freebayes.filtered.bcftools_filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/testN/testN.freebayes.filtered.bcftools_filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/testN/testN.strelka.variants.bcftools_filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/testN/testN.strelka.variants.bcftools_filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/testT", + "variant_calling/normalized/testT/testT.freebayes.filtered.bcftools_filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/testT/testT.freebayes.filtered.bcftools_filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/testT/testT.strelka.variants.bcftools_filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/testT/testT.strelka.variants.bcftools_filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/strelka", + "variant_calling/strelka/testN", + "variant_calling/strelka/testN/testN.strelka.genome.vcf.gz", + "variant_calling/strelka/testN/testN.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/testN/testN.strelka.variants.vcf.gz", + "variant_calling/strelka/testN/testN.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/testT", + "variant_calling/strelka/testT/testT.strelka.genome.vcf.gz", + "variant_calling/strelka/testT/testT.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/testT/testT.strelka.variants.vcf.gz", + "variant_calling/strelka/testT/testT.strelka.variants.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,9cfd38310d4da59c7242b2a7ae6bd651", + "mosdepth-cumcoverage-dist-id.txt:md5,f0177ed551b6ec930854d0d221904ec0", + "mosdepth_perchrom.txt:md5,9cfd38310d4da59c7242b2a7ae6bd651", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,a0c101497a0566f60b9e8388207944ab", + "samtools_alignment_plot.txt:md5,563784066de81f3bc8ccf5fbbe82b3d5", + "testN.freebayes.filtered.bcftools_stats.txt:md5,96c5e6b9e55f1bb67fca7886fb322b67", + "testT.freebayes.filtered.bcftools_stats.txt:md5,ce8b7afaace836b6a00bd8ccdc980500", + "testN.strelka.variants.bcftools_stats.txt:md5,f11c9e5e9820868809b5465970c7cc06", + "testT.strelka.variants.bcftools_stats.txt:md5,ac070b6cf4de7540a227265971244b31", + "testN.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "testN.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "testN.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "testN.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "testN.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "testT.recal.mosdepth.global.dist.txt:md5,3106c114529adc4231badeb3bb38b6d1", + "testT.recal.mosdepth.region.dist.txt:md5,ccf646922b05cb4759c4f89072be2b69", + "testT.recal.mosdepth.summary.txt:md5,024649a659caff330dfbef4ac3560542", + "testT.recal.regions.bed.gz:md5,14b36a2cf428840aab471f95cfbe399f", + "testT.recal.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917", + "testN.freebayes.filtered.FILTER.summary:md5,e2f8e86fb18631c0bf6ccd7d9d4039c8", + "testN.freebayes.filtered.TsTv.count:md5,89562fef808b5c3db629682d36fd86fc", + "testT.freebayes.filtered.FILTER.summary:md5,4b25443c427fd27761017e4f3a556b29", + "testT.freebayes.filtered.TsTv.count:md5,803d74e40f7716202bae2a3a81c1ddfc", + "testN.strelka.variants.FILTER.summary:md5,39ff2cc8eb7495a14a6b76e0ab627027", + "testN.strelka.variants.TsTv.count:md5,ee7dafc8d941b8502a04a63dc3126fff", + "testT.strelka.variants.FILTER.summary:md5,ad4c8982a91cef84d0c1ff827a0ffcf3", + "testT.strelka.variants.TsTv.count:md5,a32d1781bd32b81ec18c6e0a191c0efe" + ], + "No BAM files", + "No CRAM files", + [ + [ + "testN.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=7, phased=false, phasedAutodetect=false]" + ], + [ + "testT.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=10, phased=false, phasedAutodetect=false]" + ] + ], + [ + "testN.strelka.variants.bcftools_filtered.vcf.gz:md5,35a541d45222013a9887bbe1678f9444", + "testT.strelka.variants.bcftools_filtered.vcf.gz:md5,46757c1dd7a5c1f62b39d91281016521", + "testN.freebayes.filtered.vcf.gz:md5,7a08d55c866e45d635cf378825ddba74", + "testT.freebayes.filtered.vcf.gz:md5,dc1b49ffa2401b2a5193a0527bf8cd0a", + "testN.strelka.variants.bcftools_filtered.norm.sorted.vcf.gz:md5,c7d56b86abee34770566ea490a806772", + "testT.strelka.variants.bcftools_filtered.norm.sorted.vcf.gz:md5,4449c1d5d7aecee9ca710ed4650633ea", + "testN.strelka.genome.vcf.gz:md5,325ea84ccf1ec1cf7a6c5c4aae1c0d1", + "testN.strelka.variants.vcf.gz:md5,3a3b3e67614ec91f4f0b88999dff454b", + "testT.strelka.genome.vcf.gz:md5,ece653410947a3ace10296c2cc5ff095", + "testT.strelka.variants.vcf.gz:md5,b1cf29a73f6b423cf26ab1a71847c9b" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T10:48:21.505593" + }, + "-profile test --normalize_vcfs --concatenate_vcfs --tools freebayes,strelka": { + "content": [ + 69, + { + "ADD_INFO_TO_VCF": { + "gawk": "5.3.0" + }, + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GERMLINE_VCFS_CONCAT": { + "bcftools": 1.21 + }, + "GERMLINE_VCFS_CONCAT_SORT": { + "bcftools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_EXT_VCF": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_VC_FREEBAYES": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFS_NORM": { + "bcftools": 1.21 + }, + "VCFS_NORM_SORT": { + "bcftools": 1.21 + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/testN", + "reports/bcftools/freebayes/testN/testN.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/freebayes/testT", + "reports/bcftools/freebayes/testT/testT.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/strelka", + "reports/bcftools/strelka/testN", + "reports/bcftools/strelka/testN/testN.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/testT", + "reports/bcftools/strelka/testT/testT.strelka.variants.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/testN", + "reports/mosdepth/testN/testN.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.summary.txt", + "reports/mosdepth/testN/testN.recal.regions.bed.gz", + "reports/mosdepth/testN/testN.recal.regions.bed.gz.csi", + "reports/mosdepth/testT", + "reports/mosdepth/testT/testT.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.summary.txt", + "reports/mosdepth/testT/testT.recal.regions.bed.gz", + "reports/mosdepth/testT/testT.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/testN", + "reports/samtools/testN/testN.recal.cram.stats", + "reports/samtools/testT", + "reports/samtools/testT/testT.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/testN", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.TsTv.qual", + "reports/vcftools/freebayes/testT", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.TsTv.qual", + "reports/vcftools/strelka", + "reports/vcftools/strelka/testN", + "reports/vcftools/strelka/testN/testN.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/testN/testN.strelka.variants.TsTv.count", + "reports/vcftools/strelka/testN/testN.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/testT", + "reports/vcftools/strelka/testT/testT.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/testT/testT.strelka.variants.TsTv.count", + "reports/vcftools/strelka/testT/testT.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/concat", + "variant_calling/concat/testN", + "variant_calling/concat/testN/testN.germline.vcf.gz", + "variant_calling/concat/testN/testN.germline.vcf.gz.tbi", + "variant_calling/concat/testT", + "variant_calling/concat/testT/testT.germline.vcf.gz", + "variant_calling/concat/testT/testT.germline.vcf.gz.tbi", + "variant_calling/freebayes", + "variant_calling/freebayes/testN", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/testN/testN.freebayes.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.vcf.gz.tbi", + "variant_calling/freebayes/testT", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/testT/testT.freebayes.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.vcf.gz.tbi", + "variant_calling/normalized", + "variant_calling/normalized/testN", + "variant_calling/normalized/testN/testN.freebayes.filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/testN/testN.freebayes.filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/testN/testN.strelka.variants.norm.sorted.vcf.gz", + "variant_calling/normalized/testN/testN.strelka.variants.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/testT", + "variant_calling/normalized/testT/testT.freebayes.filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/testT/testT.freebayes.filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/testT/testT.strelka.variants.norm.sorted.vcf.gz", + "variant_calling/normalized/testT/testT.strelka.variants.norm.sorted.vcf.gz.tbi", + "variant_calling/strelka", + "variant_calling/strelka/testN", + "variant_calling/strelka/testN/testN.strelka.genome.vcf.gz", + "variant_calling/strelka/testN/testN.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/testN/testN.strelka.variants.vcf.gz", + "variant_calling/strelka/testN/testN.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/testT", + "variant_calling/strelka/testT/testT.strelka.genome.vcf.gz", + "variant_calling/strelka/testT/testT.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/testT/testT.strelka.variants.vcf.gz", + "variant_calling/strelka/testT/testT.strelka.variants.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,9cfd38310d4da59c7242b2a7ae6bd651", + "mosdepth-cumcoverage-dist-id.txt:md5,f0177ed551b6ec930854d0d221904ec0", + "mosdepth_perchrom.txt:md5,9cfd38310d4da59c7242b2a7ae6bd651", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,a0c101497a0566f60b9e8388207944ab", + "samtools_alignment_plot.txt:md5,563784066de81f3bc8ccf5fbbe82b3d5", + "testN.freebayes.filtered.bcftools_stats.txt:md5,96c5e6b9e55f1bb67fca7886fb322b67", + "testT.freebayes.filtered.bcftools_stats.txt:md5,ce8b7afaace836b6a00bd8ccdc980500", + "testN.strelka.variants.bcftools_stats.txt:md5,f11c9e5e9820868809b5465970c7cc06", + "testT.strelka.variants.bcftools_stats.txt:md5,ac070b6cf4de7540a227265971244b31", + "testN.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "testN.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "testN.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "testN.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "testN.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "testT.recal.mosdepth.global.dist.txt:md5,3106c114529adc4231badeb3bb38b6d1", + "testT.recal.mosdepth.region.dist.txt:md5,ccf646922b05cb4759c4f89072be2b69", + "testT.recal.mosdepth.summary.txt:md5,024649a659caff330dfbef4ac3560542", + "testT.recal.regions.bed.gz:md5,14b36a2cf428840aab471f95cfbe399f", + "testT.recal.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917", + "testN.freebayes.filtered.FILTER.summary:md5,e2f8e86fb18631c0bf6ccd7d9d4039c8", + "testN.freebayes.filtered.TsTv.count:md5,89562fef808b5c3db629682d36fd86fc", + "testT.freebayes.filtered.FILTER.summary:md5,4b25443c427fd27761017e4f3a556b29", + "testT.freebayes.filtered.TsTv.count:md5,803d74e40f7716202bae2a3a81c1ddfc", + "testN.strelka.variants.FILTER.summary:md5,39ff2cc8eb7495a14a6b76e0ab627027", + "testN.strelka.variants.TsTv.count:md5,ee7dafc8d941b8502a04a63dc3126fff", + "testT.strelka.variants.FILTER.summary:md5,ad4c8982a91cef84d0c1ff827a0ffcf3", + "testT.strelka.variants.TsTv.count:md5,a32d1781bd32b81ec18c6e0a191c0efe" + ], + "No BAM files", + "No CRAM files", + [ + [ + "testN.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=7, phased=false, phasedAutodetect=false]" + ], + [ + "testT.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=10, phased=false, phasedAutodetect=false]" + ] + ], + [ + "testN.freebayes.filtered.vcf.gz:md5,7a08d55c866e45d635cf378825ddba74", + "testT.freebayes.filtered.vcf.gz:md5,dc1b49ffa2401b2a5193a0527bf8cd0a", + "testN.freebayes.filtered.norm.sorted.vcf.gz:md5,18992a755b06d72374bb072cc8af86f9", + "testN.strelka.variants.norm.sorted.vcf.gz:md5,ba9aabddec39a8bcbdb38c46f7a26515", + "testT.freebayes.filtered.norm.sorted.vcf.gz:md5,574d3d9e73986d07839d5e720c3ea929", + "testT.strelka.variants.norm.sorted.vcf.gz:md5,2dd9f7f6dac9c10afcf01f148a486799", + "testN.strelka.genome.vcf.gz:md5,325ea84ccf1ec1cf7a6c5c4aae1c0d1", + "testN.strelka.variants.vcf.gz:md5,3a3b3e67614ec91f4f0b88999dff454b", + "testT.strelka.genome.vcf.gz:md5,ece653410947a3ace10296c2cc5ff095", + "testT.strelka.variants.vcf.gz:md5,b1cf29a73f6b423cf26ab1a71847c9b" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T10:44:17.917186" + } +} diff --git a/tests/postprocess_consensus.nf.test b/tests/postprocess_consensus.nf.test new file mode 100644 index 0000000000..40027f9df4 --- /dev/null +++ b/tests/postprocess_consensus.nf.test @@ -0,0 +1,86 @@ +// Tests consensus calling with CONSENSUS_FROM_SITES module +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + profile "test,mutect" + + def test_scenario = [ + [ + name: "-profile test --snv_consensus_calling --normalize_vcfs --tools mpileup,mutect2,strelka (germline + somatic)", + params: [ + genome: null, + igenomes_ignore: true, + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + germline_resource: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz', + germline_resource_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi', + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + step: "variant_calling", + wes: true, + normalize_vcfs: true, + snv_consensus_calling: true, + tools: 'mpileup,mutect2,strelka' + ], + ignoreFiles: '**/consensus/**/{0000.vcf,0000.vcf.gz,sites.txt}', + include_freebayes_unfiltered: true, + no_vcf_md5sum: true, + no_conda: true + ], + [ + name: "-profile test --filter_vcfs --snv_consensus_calling --normalize_vcfs --tools mpileup,mutect2,strelka (germline + somatic)", + params: [ + genome: null, + igenomes_ignore: true, + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + step: "variant_calling", + wes: true, + normalize_vcfs: true, + snv_consensus_calling: true, + tools: 'mpileup,mutect2,strelka', + filter_vcfs: true, + ], + ignoreFiles: '**/consensus/**/{0000.vcf,0000.vcf.gz,sites.txt}', + include_freebayes_unfiltered: true, + no_conda: true + ], + [ + name: "-profile test --snv_consensus_calling --normalize_vcfs --tools lofreq,mpileup,mutect2 (tumor-only)", + params: [ + genome: null, + igenomes_ignore: true, + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + step: "variant_calling", + wes: true, + normalize_vcfs: true, + snv_consensus_calling: true, + tools: 'lofreq,mpileup,mutect2' + ], + ignoreFiles: '**/consensus/**/{0000.vcf,0000.vcf.gz,sites.txt}', + include_freebayes_unfiltered: true, + no_vcf_md5sum: true, + no_conda: true + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/postprocess_consensus.nf.test.snap b/tests/postprocess_consensus.nf.test.snap new file mode 100644 index 0000000000..cffc4728ae --- /dev/null +++ b/tests/postprocess_consensus.nf.test.snap @@ -0,0 +1,1092 @@ +{ + "-profile test --filter_vcfs --snv_consensus_calling --normalize_vcfs --tools mpileup,mutect2,strelka (germline + somatic)": { + "content": [ + 64, + { + "ADD_INFO_TO_VCF": { + "gawk": "5.3.0" + }, + "BCFTOOLS_CONCAT": { + "bcftools": 1.21 + }, + "BCFTOOLS_ISEC": { + "bcftools": 1.22 + }, + "BCFTOOLS_MPILEUP": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CONSENSUS_FROM_SITES": { + "gawk": "mawk 1.3.4 20240123", + "htslib": "1.22.1" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FILTER_VCFS": { + "bcftools": 1.22 + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "LEARNREADORIENTATIONMODEL": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MUTECT2_PAIRED": { + "gatk4": "4.6.1.0" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "STRELKA_SOMATIC": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_EXT_VCF": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFS_NORM": { + "bcftools": 1.21 + }, + "VCFS_NORM_SORT": { + "bcftools": 1.21 + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/bcftools", + "reports/bcftools/bcftools/sample3", + "reports/bcftools/bcftools/sample3/sample3.bcftools.bcftools_stats.txt", + "reports/bcftools/mutect2", + "reports/bcftools/mutect2/sample4_vs_sample3", + "reports/bcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.bcftools_stats.txt", + "reports/bcftools/strelka", + "reports/bcftools/strelka/sample3", + "reports/bcftools/strelka/sample3/sample3.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/sample4_vs_sample3", + "reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt", + "reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/bcftools", + "reports/vcftools/bcftools/sample3", + "reports/vcftools/bcftools/sample3/sample3.bcftools.FILTER.summary", + "reports/vcftools/bcftools/sample3/sample3.bcftools.TsTv.count", + "reports/vcftools/bcftools/sample3/sample3.bcftools.TsTv.qual", + "reports/vcftools/mutect2", + "reports/vcftools/mutect2/sample4_vs_sample3", + "reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.FILTER.summary", + "reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.TsTv.count", + "reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.TsTv.qual", + "reports/vcftools/strelka", + "reports/vcftools/strelka/sample3", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.count", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/sample4_vs_sample3", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual", + "variant_calling", + "variant_calling/bcftools", + "variant_calling/bcftools/sample3", + "variant_calling/bcftools/sample3/sample3.bcftools.vcf.gz", + "variant_calling/bcftools/sample3/sample3.bcftools.vcf.gz.tbi", + "variant_calling/consensus", + "variant_calling/consensus/sample3", + "variant_calling/consensus/sample3/sample3.consensus.vcf.gz", + "variant_calling/consensus/sample3/sample3.consensus.vcf.gz.tbi", + "variant_calling/consensus/sample3/sample3_consensus", + "variant_calling/consensus/sample3/sample3_consensus/0000.vcf.gz", + "variant_calling/consensus/sample3/sample3_consensus/0000.vcf.gz.tbi", + "variant_calling/consensus/sample3/sample3_consensus/0001.vcf.gz", + "variant_calling/consensus/sample3/sample3_consensus/0001.vcf.gz.tbi", + "variant_calling/consensus/sample3/sample3_consensus/README.txt", + "variant_calling/consensus/sample3/sample3_consensus/sites.txt", + "variant_calling/consensus/sample4_vs_sample3", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3.consensus.vcf.gz", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3.consensus.vcf.gz.tbi", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3_consensus", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3_consensus/0000.vcf.gz", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3_consensus/0000.vcf.gz.tbi", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3_consensus/0001.vcf.gz", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3_consensus/0001.vcf.gz.tbi", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3_consensus/README.txt", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3_consensus/sites.txt", + "variant_calling/filtered", + "variant_calling/filtered/sample3", + "variant_calling/filtered/sample3/sample3.bcftools.bcftools_filtered.vcf.gz", + "variant_calling/filtered/sample3/sample3.bcftools.bcftools_filtered.vcf.gz.tbi", + "variant_calling/filtered/sample3/sample3.strelka.variants.bcftools_filtered.vcf.gz", + "variant_calling/filtered/sample3/sample3.strelka.variants.bcftools_filtered.vcf.gz.tbi", + "variant_calling/filtered/sample4_vs_sample3", + "variant_calling/filtered/sample4_vs_sample3/sample4_vs_sample3.mutect2.bcftools_filtered.vcf.gz", + "variant_calling/filtered/sample4_vs_sample3/sample4_vs_sample3.mutect2.bcftools_filtered.vcf.gz.tbi", + "variant_calling/filtered/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_filtered.vcf.gz", + "variant_calling/filtered/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_filtered.vcf.gz.tbi", + "variant_calling/filtered/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_filtered.vcf.gz", + "variant_calling/filtered/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_filtered.vcf.gz.tbi", + "variant_calling/mutect2", + "variant_calling/mutect2/sample4_vs_sample3", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.artifactprior.tar.gz", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.vcf.gz", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.vcf.gz.stats", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.vcf.gz.tbi", + "variant_calling/normalized", + "variant_calling/normalized/sample3", + "variant_calling/normalized/sample3/sample3.bcftools.bcftools_filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/sample3/sample3.bcftools.bcftools_filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/sample3/sample3.strelka.variants.bcftools_filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/sample3/sample3.strelka.variants.bcftools_filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/sample4_vs_sample3", + "variant_calling/normalized/sample4_vs_sample3/sample4_vs_sample3.mutect2.bcftools_filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/sample4_vs_sample3/sample4_vs_sample3.mutect2.bcftools_filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/strelka", + "variant_calling/strelka/sample3", + "variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz", + "variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz", + "variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,4ee05f71086179b42a01cd2fb450346f", + "mosdepth-cumcoverage-dist-id.txt:md5,cb7468f51b8be1230fb3ac5b130be31f", + "mosdepth_perchrom.txt:md5,4ee05f71086179b42a01cd2fb450346f", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample3.bcftools.bcftools_stats.txt:md5,bd86ded3843a217d429b34edd22a9a4e", + "sample4_vs_sample3.mutect2.bcftools_stats.txt:md5,65bc65858d3dfa1d8913119850626823", + "sample3.strelka.variants.bcftools_stats.txt:md5,6d4d032ba146941cb226765aaed9d67f", + "sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt:md5,62c6123f6494c3cdbd42dc7230e757b3", + "sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt:md5,8404ea88658fbc41d447ba20bf46dd0a", + "sample3.recal.mosdepth.global.dist.txt:md5,69e29702ef01fd8f6c7a5468fc35a16a", + "sample3.recal.mosdepth.region.dist.txt:md5,6ec49cd7d510c2eb3d9d90fdb79b783a", + "sample3.recal.mosdepth.summary.txt:md5,103098d0bf76ed82d2b87d5f242b099a", + "sample3.recal.per-base.bed.gz:md5,297f96648928d0ca5184223fb9941e7c", + "sample3.recal.per-base.bed.gz.csi:md5,c67dcd711b096eb42f43784d5eadbc0d", + "sample3.recal.regions.bed.gz:md5,314ce8d7273eff353072108aa77c327c", + "sample3.recal.regions.bed.gz.csi:md5,9cb0ad7039a3b703d16ca7d5b835c0ee", + "sample4.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample4.recal.mosdepth.region.dist.txt:md5,39005ffaac22871ffaaf19656fe69c5b", + "sample4.recal.mosdepth.summary.txt:md5,68d4b98f17361fddf73052ead34fa370", + "sample4.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample4.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample4.recal.regions.bed.gz:md5,b7561bc56a955f7db0f11e67e2ec0386", + "sample4.recal.regions.bed.gz.csi:md5,393c2749068304d8545b501b9d4658e4", + "sample3.bcftools.FILTER.summary:md5,9b62595b026decf12e9198d531e4307a", + "sample3.bcftools.TsTv.count:md5,6c937125d7bac4c491bea50f18cba43a", + "sample4_vs_sample3.mutect2.FILTER.summary:md5,cac64448be577632a614af62a23af34a", + "sample4_vs_sample3.mutect2.TsTv.count:md5,3739f24da2d2019cc4bc2821e30658eb", + "sample3.strelka.variants.FILTER.summary:md5,2048a5de0201a6052c988a0189979a5f", + "sample3.strelka.variants.TsTv.count:md5,c5b7a8eda2526d899098439ae4c06a49", + "sample4_vs_sample3.strelka.somatic_indels.FILTER.summary:md5,3441628cd6550ed459ca1c3db989ceea", + "sample4_vs_sample3.strelka.somatic_indels.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f", + "sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary:md5,4fc17fa5625b4d1dcc5d791b1eb22d85", + "sample4_vs_sample3.strelka.somatic_snvs.TsTv.count:md5,fc7af1f534890c4ad3025588b3af62ae", + "sample4_vs_sample3.mutect2.vcf.gz.stats:md5,bd657dd9abf6e2354224bb0d20ba181e" + ], + "No BAM files", + "No CRAM files", + "No Freebayes unfiltered VCF files", + [ + "sample3.bcftools.vcf.gz:md5,c55cf36bc05b6a7b4a98bb3ba925fc0a", + "sample3.consensus.vcf.gz:md5,eb2d34b0469dce34bb82b6f1c228ede5", + "0001.vcf.gz:md5,87e67c661c05d0355873776659ce7f64", + "sample4_vs_sample3.consensus.vcf.gz:md5,32fd4315aa4012d8c066927ab0313a22", + "0001.vcf.gz:md5,e5e12b0d83fdea7e2013b63241eb456a", + "sample3.bcftools.bcftools_filtered.vcf.gz:md5,c55cf36bc05b6a7b4a98bb3ba925fc0a", + "sample3.strelka.variants.bcftools_filtered.vcf.gz:md5,bdd5d76d3f2cc6a873ba829b91155562", + "sample4_vs_sample3.mutect2.bcftools_filtered.vcf.gz:md5,6714999f8c31fa73d82272d99b34a3a1", + "sample4_vs_sample3.strelka.somatic_indels.bcftools_filtered.vcf.gz:md5,a7861b539e2cfd4429a02757bc9845e9", + "sample4_vs_sample3.strelka.somatic_snvs.bcftools_filtered.vcf.gz:md5,973d0ca572b2c94e18279a3c7694d934", + "sample4_vs_sample3.mutect2.vcf.gz:md5,f2c46d0dae1b1a59180c0b9e595993d2", + "sample3.bcftools.bcftools_filtered.norm.sorted.vcf.gz:md5,ff4832fed7bffc1b53254f635ddb8a95", + "sample3.strelka.variants.bcftools_filtered.norm.sorted.vcf.gz:md5,e3f3390321c8693afd777202e15e9b5e", + "sample4_vs_sample3.mutect2.bcftools_filtered.norm.sorted.vcf.gz:md5,8826a34d4c7a004750b8a56fee56dcbb", + "sample4_vs_sample3.strelka.somatic_indels.bcftools_filtered.norm.sorted.vcf.gz:md5,e60a2a65edfea45253619ccd5c6054f0", + "sample4_vs_sample3.strelka.somatic_snvs.bcftools_filtered.norm.sorted.vcf.gz:md5,4aaf8e3ac50147ec1164421c3702adb9", + "sample3.strelka.genome.vcf.gz:md5,22cd13eb3ab8e94c58d1dc311b81a793", + "sample3.strelka.variants.vcf.gz:md5,5bc0beaaa04d75a72193339b0494ed5f", + "sample4_vs_sample3.strelka.somatic_indels.vcf.gz:md5,a500521b4aa64e8f247d57fcb3195b39", + "sample4_vs_sample3.strelka.somatic_snvs.vcf.gz:md5,17d4e21e50940b4e75cca144af09ce66" + ], + [ + "WARN: If Mutect2 is specified without a germline resource, no filtering will be done.", + "WARN: No Panel-of-normal was specified for Mutect2." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-06T14:58:25.361891009" + }, + "-profile test --snv_consensus_calling --normalize_vcfs --tools lofreq,mpileup,mutect2 (tumor-only)": { + "content": [ + 36, + { + "ADD_INFO_TO_VCF": { + "gawk": "5.3.0" + }, + "BCFTOOLS_ISEC": { + "bcftools": 1.22 + }, + "BCFTOOLS_MPILEUP": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CONSENSUS_FROM_SITES": { + "gawk": "mawk 1.3.4 20240123", + "htslib": "1.22.1" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "LEARNREADORIENTATIONMODEL": { + "gatk4": "4.6.1.0" + }, + "LOFREQ": { + "lofreq": "2.1.5" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MUTECT2": { + "gatk4": "4.6.1.0" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_EXT_VCF": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFS_NORM": { + "bcftools": 1.21 + }, + "VCFS_NORM_SORT": { + "bcftools": 1.21 + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/bcftools", + "reports/bcftools/bcftools/sample2", + "reports/bcftools/bcftools/sample2/sample2.bcftools.bcftools_stats.txt", + "reports/bcftools/lofreq", + "reports/bcftools/lofreq/sample2", + "reports/bcftools/lofreq/sample2/sample2.lofreq.bcftools_stats.txt", + "reports/bcftools/mutect2", + "reports/bcftools/mutect2/sample2", + "reports/bcftools/mutect2/sample2/sample2.mutect2.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/bcftools", + "reports/vcftools/bcftools/sample2", + "reports/vcftools/bcftools/sample2/sample2.bcftools.FILTER.summary", + "reports/vcftools/bcftools/sample2/sample2.bcftools.TsTv.count", + "reports/vcftools/bcftools/sample2/sample2.bcftools.TsTv.qual", + "reports/vcftools/lofreq", + "reports/vcftools/lofreq/sample2", + "reports/vcftools/lofreq/sample2/sample2.lofreq.FILTER.summary", + "reports/vcftools/lofreq/sample2/sample2.lofreq.TsTv.qual", + "reports/vcftools/mutect2", + "reports/vcftools/mutect2/sample2", + "reports/vcftools/mutect2/sample2/sample2.mutect2.FILTER.summary", + "reports/vcftools/mutect2/sample2/sample2.mutect2.TsTv.count", + "reports/vcftools/mutect2/sample2/sample2.mutect2.TsTv.qual", + "variant_calling", + "variant_calling/bcftools", + "variant_calling/bcftools/sample2", + "variant_calling/bcftools/sample2/sample2.bcftools.vcf.gz", + "variant_calling/bcftools/sample2/sample2.bcftools.vcf.gz.tbi", + "variant_calling/consensus", + "variant_calling/consensus/sample2", + "variant_calling/consensus/sample2/sample2.consensus.vcf.gz", + "variant_calling/consensus/sample2/sample2.consensus.vcf.gz.tbi", + "variant_calling/consensus/sample2/sample2_consensus", + "variant_calling/consensus/sample2/sample2_consensus/0000.vcf.gz", + "variant_calling/consensus/sample2/sample2_consensus/0000.vcf.gz.tbi", + "variant_calling/consensus/sample2/sample2_consensus/0001.vcf.gz", + "variant_calling/consensus/sample2/sample2_consensus/0001.vcf.gz.tbi", + "variant_calling/consensus/sample2/sample2_consensus/0002.vcf.gz", + "variant_calling/consensus/sample2/sample2_consensus/0002.vcf.gz.tbi", + "variant_calling/consensus/sample2/sample2_consensus/README.txt", + "variant_calling/consensus/sample2/sample2_consensus/sites.txt", + "variant_calling/lofreq", + "variant_calling/lofreq/sample2", + "variant_calling/lofreq/sample2/sample2.lofreq.vcf.gz", + "variant_calling/lofreq/sample2/sample2.lofreq.vcf.gz.tbi", + "variant_calling/mutect2", + "variant_calling/mutect2/sample2", + "variant_calling/mutect2/sample2/sample2.mutect2.artifactprior.tar.gz", + "variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz", + "variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz.stats", + "variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz.tbi", + "variant_calling/normalized", + "variant_calling/normalized/sample2", + "variant_calling/normalized/sample2/sample2.bcftools.norm.sorted.vcf.gz", + "variant_calling/normalized/sample2/sample2.bcftools.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/sample2/sample2.lofreq.norm.sorted.vcf.gz", + "variant_calling/normalized/sample2/sample2.lofreq.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/sample2/sample2.mutect2.norm.sorted.vcf.gz", + "variant_calling/normalized/sample2/sample2.mutect2.norm.sorted.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,1f24f2f40467234c410a8bda544a8aae", + "mosdepth-cumcoverage-dist-id.txt:md5,1036ea76acae803f591fd99838a8eded", + "mosdepth_perchrom.txt:md5,1f24f2f40467234c410a8bda544a8aae", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.bcftools.bcftools_stats.txt:md5,3299f97352e32c873c95e43922c79147", + "sample2.lofreq.bcftools_stats.txt:md5,a8a850fdd11644fa4b770971dfe37194", + "sample2.mutect2.bcftools_stats.txt:md5,846b97f3763c5316f2b7e76962d80be9", + "sample2.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample2.recal.mosdepth.region.dist.txt:md5,39005ffaac22871ffaaf19656fe69c5b", + "sample2.recal.mosdepth.summary.txt:md5,68d4b98f17361fddf73052ead34fa370", + "sample2.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample2.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample2.recal.regions.bed.gz:md5,b7561bc56a955f7db0f11e67e2ec0386", + "sample2.recal.regions.bed.gz.csi:md5,393c2749068304d8545b501b9d4658e4", + "sample2.bcftools.FILTER.summary:md5,8766995f3e4119ef30dfdaa9fb3752ce", + "sample2.bcftools.TsTv.count:md5,01df95fcb4df593f7e1b214d90ebdb59", + "sample2.lofreq.FILTER.summary:md5,8dd8a0c91d5c4a260b462e04f615e502", + "sample2.mutect2.FILTER.summary:md5,5ca4eee02bc653debd0b37883cdf13c3", + "sample2.mutect2.TsTv.count:md5,09032284e4ad37fd933302235f857f7d", + "sample2.mutect2.vcf.gz.stats:md5,38e93a2641469e6401a1e5727d9ab0d2" + ], + "No BAM files", + "No CRAM files", + "No Freebayes unfiltered VCF files", + [ + [ + "sample2.bcftools.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=132, phased=false, phasedAutodetect=false]" + ], + [ + "sample2.consensus.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=0, variantCount=71, phased=true, phasedAutodetect=true]" + ], + [ + "0001.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=0, variantCount=67, phased=true, phasedAutodetect=true]" + ], + [ + "0002.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=55, phased=false, phasedAutodetect=false]" + ], + [ + "sample2.lofreq.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=0, variantCount=69, phased=true, phasedAutodetect=true]" + ], + [ + "sample2.mutect2.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=1261, phased=false, phasedAutodetect=false]" + ], + [ + "sample2.bcftools.norm.sorted.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=132, phased=false, phasedAutodetect=false]" + ], + [ + "sample2.lofreq.norm.sorted.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=0, variantCount=69, phased=true, phasedAutodetect=true]" + ], + [ + "sample2.mutect2.norm.sorted.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=1261, phased=false, phasedAutodetect=false]" + ] + ], + [ + "WARN: If Mutect2 is specified without a germline resource, no filtering will be done.", + "WARN: No Panel-of-normal was specified for Mutect2." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-06T15:02:50.699837449" + }, + "-profile test --snv_consensus_calling --normalize_vcfs --tools mpileup,mutect2,strelka (germline + somatic)": { + "content": [ + 63, + { + "ADD_INFO_TO_VCF": { + "gawk": "5.3.0" + }, + "BCFTOOLS_CONCAT": { + "bcftools": 1.21 + }, + "BCFTOOLS_ISEC": { + "bcftools": 1.22 + }, + "BCFTOOLS_MPILEUP": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CALCULATECONTAMINATION": { + "gatk4": "4.6.1.0" + }, + "CONSENSUS_FROM_SITES": { + "gawk": "mawk 1.3.4 20240123", + "htslib": "1.22.1" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FILTERMUTECTCALLS": { + "gatk4": "4.6.1.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "GETPILEUPSUMMARIES_NORMAL": { + "gatk4": "4.6.1.0" + }, + "GETPILEUPSUMMARIES_TUMOR": { + "gatk4": "4.6.1.0" + }, + "LEARNREADORIENTATIONMODEL": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MUTECT2_PAIRED": { + "gatk4": "4.6.1.0" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "STRELKA_SOMATIC": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_EXT_VCF": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFS_NORM": { + "bcftools": 1.21 + }, + "VCFS_NORM_SORT": { + "bcftools": 1.21 + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/bcftools", + "reports/bcftools/bcftools/sample3", + "reports/bcftools/bcftools/sample3/sample3.bcftools.bcftools_stats.txt", + "reports/bcftools/mutect2", + "reports/bcftools/mutect2/sample4_vs_sample3", + "reports/bcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.bcftools_stats.txt", + "reports/bcftools/strelka", + "reports/bcftools/strelka/sample3", + "reports/bcftools/strelka/sample3/sample3.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/sample4_vs_sample3", + "reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt", + "reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/bcftools", + "reports/vcftools/bcftools/sample3", + "reports/vcftools/bcftools/sample3/sample3.bcftools.FILTER.summary", + "reports/vcftools/bcftools/sample3/sample3.bcftools.TsTv.count", + "reports/vcftools/bcftools/sample3/sample3.bcftools.TsTv.qual", + "reports/vcftools/mutect2", + "reports/vcftools/mutect2/sample4_vs_sample3", + "reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.FILTER.summary", + "reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.TsTv.count", + "reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.TsTv.qual", + "reports/vcftools/strelka", + "reports/vcftools/strelka/sample3", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.count", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/sample4_vs_sample3", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual", + "variant_calling", + "variant_calling/bcftools", + "variant_calling/bcftools/sample3", + "variant_calling/bcftools/sample3/sample3.bcftools.vcf.gz", + "variant_calling/bcftools/sample3/sample3.bcftools.vcf.gz.tbi", + "variant_calling/consensus", + "variant_calling/consensus/sample3", + "variant_calling/consensus/sample3/sample3.consensus.vcf.gz", + "variant_calling/consensus/sample3/sample3.consensus.vcf.gz.tbi", + "variant_calling/consensus/sample3/sample3_consensus", + "variant_calling/consensus/sample3/sample3_consensus/0000.vcf.gz", + "variant_calling/consensus/sample3/sample3_consensus/0000.vcf.gz.tbi", + "variant_calling/consensus/sample3/sample3_consensus/0001.vcf.gz", + "variant_calling/consensus/sample3/sample3_consensus/0001.vcf.gz.tbi", + "variant_calling/consensus/sample3/sample3_consensus/README.txt", + "variant_calling/consensus/sample3/sample3_consensus/sites.txt", + "variant_calling/consensus/sample4_vs_sample3", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3.consensus.vcf.gz", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3.consensus.vcf.gz.tbi", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3_consensus", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3_consensus/0000.vcf.gz", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3_consensus/0000.vcf.gz.tbi", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3_consensus/0001.vcf.gz", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3_consensus/0001.vcf.gz.tbi", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3_consensus/README.txt", + "variant_calling/consensus/sample4_vs_sample3/sample4_vs_sample3_consensus/sites.txt", + "variant_calling/mutect2", + "variant_calling/mutect2/sample3", + "variant_calling/mutect2/sample3/sample3.mutect2.pileups.table", + "variant_calling/mutect2/sample4", + "variant_calling/mutect2/sample4/sample4.mutect2.pileups.table", + "variant_calling/mutect2/sample4_vs_sample3", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.artifactprior.tar.gz", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.contamination.table", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.vcf.gz", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.vcf.gz.filteringStats.tsv", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.vcf.gz.tbi", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.segmentation.table", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.vcf.gz", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.vcf.gz.stats", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.vcf.gz.tbi", + "variant_calling/normalized", + "variant_calling/normalized/sample3", + "variant_calling/normalized/sample3/sample3.bcftools.norm.sorted.vcf.gz", + "variant_calling/normalized/sample3/sample3.bcftools.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/sample3/sample3.strelka.variants.norm.sorted.vcf.gz", + "variant_calling/normalized/sample3/sample3.strelka.variants.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/sample4_vs_sample3", + "variant_calling/normalized/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.norm.sorted.vcf.gz", + "variant_calling/normalized/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.norm.sorted.vcf.gz", + "variant_calling/normalized/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.norm.sorted.vcf.gz.tbi", + "variant_calling/strelka", + "variant_calling/strelka/sample3", + "variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz", + "variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz", + "variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,4ee05f71086179b42a01cd2fb450346f", + "mosdepth-cumcoverage-dist-id.txt:md5,cb7468f51b8be1230fb3ac5b130be31f", + "mosdepth_perchrom.txt:md5,4ee05f71086179b42a01cd2fb450346f", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample3.bcftools.bcftools_stats.txt:md5,bd86ded3843a217d429b34edd22a9a4e", + "sample4_vs_sample3.mutect2.filtered.bcftools_stats.txt:md5,91e802a68f9d8da14bb3dcf784f0810d", + "sample3.strelka.variants.bcftools_stats.txt:md5,6d4d032ba146941cb226765aaed9d67f", + "sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt:md5,62c6123f6494c3cdbd42dc7230e757b3", + "sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt:md5,8404ea88658fbc41d447ba20bf46dd0a", + "sample3.recal.mosdepth.global.dist.txt:md5,69e29702ef01fd8f6c7a5468fc35a16a", + "sample3.recal.mosdepth.region.dist.txt:md5,6ec49cd7d510c2eb3d9d90fdb79b783a", + "sample3.recal.mosdepth.summary.txt:md5,103098d0bf76ed82d2b87d5f242b099a", + "sample3.recal.per-base.bed.gz:md5,297f96648928d0ca5184223fb9941e7c", + "sample3.recal.per-base.bed.gz.csi:md5,c67dcd711b096eb42f43784d5eadbc0d", + "sample3.recal.regions.bed.gz:md5,314ce8d7273eff353072108aa77c327c", + "sample3.recal.regions.bed.gz.csi:md5,9cb0ad7039a3b703d16ca7d5b835c0ee", + "sample4.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample4.recal.mosdepth.region.dist.txt:md5,39005ffaac22871ffaaf19656fe69c5b", + "sample4.recal.mosdepth.summary.txt:md5,68d4b98f17361fddf73052ead34fa370", + "sample4.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample4.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample4.recal.regions.bed.gz:md5,b7561bc56a955f7db0f11e67e2ec0386", + "sample4.recal.regions.bed.gz.csi:md5,393c2749068304d8545b501b9d4658e4", + "sample3.bcftools.FILTER.summary:md5,9b62595b026decf12e9198d531e4307a", + "sample3.bcftools.TsTv.count:md5,6c937125d7bac4c491bea50f18cba43a", + "sample4_vs_sample3.mutect2.filtered.FILTER.summary:md5,b25d4d2a64f9590d0ffb119fd3adb06e", + "sample4_vs_sample3.mutect2.filtered.TsTv.count:md5,3739f24da2d2019cc4bc2821e30658eb", + "sample3.strelka.variants.FILTER.summary:md5,2048a5de0201a6052c988a0189979a5f", + "sample3.strelka.variants.TsTv.count:md5,c5b7a8eda2526d899098439ae4c06a49", + "sample4_vs_sample3.strelka.somatic_indels.FILTER.summary:md5,3441628cd6550ed459ca1c3db989ceea", + "sample4_vs_sample3.strelka.somatic_indels.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f", + "sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary:md5,4fc17fa5625b4d1dcc5d791b1eb22d85", + "sample4_vs_sample3.strelka.somatic_snvs.TsTv.count:md5,fc7af1f534890c4ad3025588b3af62ae", + "sample3.mutect2.pileups.table:md5,29388a37ebae6c6c5f868bdb7b341d26", + "sample4.mutect2.pileups.table:md5,df85ceff89be6f9a13707d9cda29dd6e", + "sample4_vs_sample3.mutect2.contamination.table:md5,46c708c943b453da89a3da08acfdb2a7", + "sample4_vs_sample3.mutect2.filtered.vcf.gz.filteringStats.tsv:md5,9ae27fbd04af1a2ea574e2ff1c3a683b", + "sample4_vs_sample3.mutect2.segmentation.table:md5,f4643d9319bde4efbfbe516d6fb13052", + "sample4_vs_sample3.mutect2.vcf.gz.stats:md5,bd657dd9abf6e2354224bb0d20ba181e" + ], + "No BAM files", + "No CRAM files", + "No Freebayes unfiltered VCF files", + [ + [ + "sample3.bcftools.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=123, phased=false, phasedAutodetect=false]" + ], + [ + "sample3.consensus.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=0, variantCount=70, phased=true, phasedAutodetect=true]" + ], + [ + "0001.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=70, phased=false, phasedAutodetect=false]" + ], + [ + "sample4_vs_sample3.consensus.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=0, variantCount=10, phased=true, phasedAutodetect=true]" + ], + [ + "0001.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=2, variantCount=10, phased=true, phasedAutodetect=true]" + ], + [ + "sample4_vs_sample3.mutect2.filtered.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=2, variantCount=40, phased=false, phasedAutodetect=false]" + ], + [ + "sample4_vs_sample3.mutect2.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=2, variantCount=40, phased=false, phasedAutodetect=false]" + ], + [ + "sample3.bcftools.norm.sorted.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=123, phased=false, phasedAutodetect=false]" + ], + [ + "sample3.strelka.variants.norm.sorted.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=52656, phased=false, phasedAutodetect=false]" + ], + [ + "sample4_vs_sample3.mutect2.filtered.norm.sorted.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=2, variantCount=40, phased=false, phasedAutodetect=false]" + ], + [ + "sample4_vs_sample3.strelka.somatic_indels.norm.sorted.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=2, variantCount=5, phased=true, phasedAutodetect=false]" + ], + [ + "sample4_vs_sample3.strelka.somatic_snvs.norm.sorted.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=2, variantCount=1602, phased=true, phasedAutodetect=true]" + ], + [ + "sample3.strelka.genome.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=898748, phased=false, phasedAutodetect=false]" + ], + [ + "sample3.strelka.variants.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=52656, phased=false, phasedAutodetect=false]" + ], + [ + "sample4_vs_sample3.strelka.somatic_indels.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=2, variantCount=5, phased=true, phasedAutodetect=false]" + ], + [ + "sample4_vs_sample3.strelka.somatic_snvs.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=2, variantCount=1602, phased=true, phasedAutodetect=true]" + ] + ], + [ + "WARN: No Panel-of-normal was specified for Mutect2." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-06T14:53:33.998324217" + } +} diff --git a/tests/postprocess_filtering.nf.test b/tests/postprocess_filtering.nf.test new file mode 100644 index 0000000000..5693e12a2b --- /dev/null +++ b/tests/postprocess_filtering.nf.test @@ -0,0 +1,27 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --filter_vcfs --tools freebayes,strelka", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_joint_bam.csv", + step: 'variant_calling', + filter_vcfs: true, + tools: 'freebayes,strelka' + ], + include_freebayes_unfiltered: true, + ignoreFiles: '**/*.{freebayes,freebayes.filtered.bcftools_filtered*}.vcf{,.gz}', + no_conda: true + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/postprocess_filtering.nf.test.snap b/tests/postprocess_filtering.nf.test.snap new file mode 100644 index 0000000000..c55c223cee --- /dev/null +++ b/tests/postprocess_filtering.nf.test.snap @@ -0,0 +1,288 @@ +{ + "-profile test --filter_vcfs --tools freebayes,strelka": { + "content": [ + 41, + { + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FILTER_VCFS": { + "bcftools": 1.22 + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_VC_FREEBAYES": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/testN", + "reports/bcftools/freebayes/testN/testN.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/freebayes/testT", + "reports/bcftools/freebayes/testT/testT.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/strelka", + "reports/bcftools/strelka/testN", + "reports/bcftools/strelka/testN/testN.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/testT", + "reports/bcftools/strelka/testT/testT.strelka.variants.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/testN", + "reports/mosdepth/testN/testN.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.summary.txt", + "reports/mosdepth/testN/testN.recal.regions.bed.gz", + "reports/mosdepth/testN/testN.recal.regions.bed.gz.csi", + "reports/mosdepth/testT", + "reports/mosdepth/testT/testT.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.summary.txt", + "reports/mosdepth/testT/testT.recal.regions.bed.gz", + "reports/mosdepth/testT/testT.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/testN", + "reports/samtools/testN/testN.recal.cram.stats", + "reports/samtools/testT", + "reports/samtools/testT/testT.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/testN", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.TsTv.qual", + "reports/vcftools/freebayes/testT", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.TsTv.qual", + "reports/vcftools/strelka", + "reports/vcftools/strelka/testN", + "reports/vcftools/strelka/testN/testN.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/testN/testN.strelka.variants.TsTv.count", + "reports/vcftools/strelka/testN/testN.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/testT", + "reports/vcftools/strelka/testT/testT.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/testT/testT.strelka.variants.TsTv.count", + "reports/vcftools/strelka/testT/testT.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/filtered", + "variant_calling/filtered/testN", + "variant_calling/filtered/testN/testN.freebayes.filtered.bcftools_filtered.vcf.gz", + "variant_calling/filtered/testN/testN.freebayes.filtered.bcftools_filtered.vcf.gz.tbi", + "variant_calling/filtered/testN/testN.strelka.variants.bcftools_filtered.vcf.gz", + "variant_calling/filtered/testN/testN.strelka.variants.bcftools_filtered.vcf.gz.tbi", + "variant_calling/filtered/testT", + "variant_calling/filtered/testT/testT.freebayes.filtered.bcftools_filtered.vcf.gz", + "variant_calling/filtered/testT/testT.freebayes.filtered.bcftools_filtered.vcf.gz.tbi", + "variant_calling/filtered/testT/testT.strelka.variants.bcftools_filtered.vcf.gz", + "variant_calling/filtered/testT/testT.strelka.variants.bcftools_filtered.vcf.gz.tbi", + "variant_calling/freebayes", + "variant_calling/freebayes/testN", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/testN/testN.freebayes.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.vcf.gz.tbi", + "variant_calling/freebayes/testT", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/testT/testT.freebayes.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.vcf.gz.tbi", + "variant_calling/strelka", + "variant_calling/strelka/testN", + "variant_calling/strelka/testN/testN.strelka.genome.vcf.gz", + "variant_calling/strelka/testN/testN.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/testN/testN.strelka.variants.vcf.gz", + "variant_calling/strelka/testN/testN.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/testT", + "variant_calling/strelka/testT/testT.strelka.genome.vcf.gz", + "variant_calling/strelka/testT/testT.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/testT/testT.strelka.variants.vcf.gz", + "variant_calling/strelka/testT/testT.strelka.variants.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,9cfd38310d4da59c7242b2a7ae6bd651", + "mosdepth-cumcoverage-dist-id.txt:md5,f0177ed551b6ec930854d0d221904ec0", + "mosdepth_perchrom.txt:md5,9cfd38310d4da59c7242b2a7ae6bd651", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,a0c101497a0566f60b9e8388207944ab", + "samtools_alignment_plot.txt:md5,563784066de81f3bc8ccf5fbbe82b3d5", + "testN.freebayes.filtered.bcftools_stats.txt:md5,96c5e6b9e55f1bb67fca7886fb322b67", + "testT.freebayes.filtered.bcftools_stats.txt:md5,ce8b7afaace836b6a00bd8ccdc980500", + "testN.strelka.variants.bcftools_stats.txt:md5,f11c9e5e9820868809b5465970c7cc06", + "testT.strelka.variants.bcftools_stats.txt:md5,ac070b6cf4de7540a227265971244b31", + "testN.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "testN.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "testN.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "testN.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "testN.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "testT.recal.mosdepth.global.dist.txt:md5,3106c114529adc4231badeb3bb38b6d1", + "testT.recal.mosdepth.region.dist.txt:md5,ccf646922b05cb4759c4f89072be2b69", + "testT.recal.mosdepth.summary.txt:md5,024649a659caff330dfbef4ac3560542", + "testT.recal.regions.bed.gz:md5,14b36a2cf428840aab471f95cfbe399f", + "testT.recal.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917", + "testN.freebayes.filtered.FILTER.summary:md5,e2f8e86fb18631c0bf6ccd7d9d4039c8", + "testN.freebayes.filtered.TsTv.count:md5,89562fef808b5c3db629682d36fd86fc", + "testT.freebayes.filtered.FILTER.summary:md5,4b25443c427fd27761017e4f3a556b29", + "testT.freebayes.filtered.TsTv.count:md5,803d74e40f7716202bae2a3a81c1ddfc", + "testN.strelka.variants.FILTER.summary:md5,39ff2cc8eb7495a14a6b76e0ab627027", + "testN.strelka.variants.TsTv.count:md5,ee7dafc8d941b8502a04a63dc3126fff", + "testT.strelka.variants.FILTER.summary:md5,ad4c8982a91cef84d0c1ff827a0ffcf3", + "testT.strelka.variants.TsTv.count:md5,a32d1781bd32b81ec18c6e0a191c0efe" + ], + "No BAM files", + "No CRAM files", + [ + [ + "testN.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=7, phased=false, phasedAutodetect=false]" + ], + [ + "testT.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=10, phased=false, phasedAutodetect=false]" + ] + ], + [ + "testN.strelka.variants.bcftools_filtered.vcf.gz:md5,35a541d45222013a9887bbe1678f9444", + "testT.strelka.variants.bcftools_filtered.vcf.gz:md5,46757c1dd7a5c1f62b39d91281016521", + "testN.freebayes.filtered.vcf.gz:md5,7a08d55c866e45d635cf378825ddba74", + "testT.freebayes.filtered.vcf.gz:md5,dc1b49ffa2401b2a5193a0527bf8cd0a", + "testN.strelka.genome.vcf.gz:md5,325ea84ccf1ec1cf7a6c5c4aae1c0d1", + "testN.strelka.variants.vcf.gz:md5,3a3b3e67614ec91f4f0b88999dff454b", + "testT.strelka.genome.vcf.gz:md5,ece653410947a3ace10296c2cc5ff095", + "testT.strelka.variants.vcf.gz:md5,b1cf29a73f6b423cf26ab1a71847c9b" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T11:32:58.31191" + } +} diff --git a/tests/postprocess_normalization.nf.test b/tests/postprocess_normalization.nf.test new file mode 100644 index 0000000000..a09e13d9b3 --- /dev/null +++ b/tests/postprocess_normalization.nf.test @@ -0,0 +1,27 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --normalize_vcfs --tools freebayes,strelka", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_joint_bam.csv", + step: 'variant_calling', + normalize_vcfs: true, + tools: 'freebayes,strelka' + ], + ignoreFiles: '**/*.{freebayes,freebayes.filtered.bcftools_filtered*}.vcf{,.gz}', + include_freebayes_unfiltered: true, + no_conda: true + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/postprocess_normalization.nf.test.snap b/tests/postprocess_normalization.nf.test.snap new file mode 100644 index 0000000000..c76009c1ca --- /dev/null +++ b/tests/postprocess_normalization.nf.test.snap @@ -0,0 +1,300 @@ +{ + "-profile test --normalize_vcfs --tools freebayes,strelka": { + "content": [ + 53, + { + "ADD_INFO_TO_VCF": { + "gawk": "5.3.0" + }, + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_EXT_VCF": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_VC_FREEBAYES": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFS_NORM": { + "bcftools": 1.21 + }, + "VCFS_NORM_SORT": { + "bcftools": 1.21 + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/testN", + "reports/bcftools/freebayes/testN/testN.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/freebayes/testT", + "reports/bcftools/freebayes/testT/testT.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/strelka", + "reports/bcftools/strelka/testN", + "reports/bcftools/strelka/testN/testN.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/testT", + "reports/bcftools/strelka/testT/testT.strelka.variants.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/testN", + "reports/mosdepth/testN/testN.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testN/testN.recal.mosdepth.summary.txt", + "reports/mosdepth/testN/testN.recal.regions.bed.gz", + "reports/mosdepth/testN/testN.recal.regions.bed.gz.csi", + "reports/mosdepth/testT", + "reports/mosdepth/testT/testT.recal.mosdepth.global.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.region.dist.txt", + "reports/mosdepth/testT/testT.recal.mosdepth.summary.txt", + "reports/mosdepth/testT/testT.recal.regions.bed.gz", + "reports/mosdepth/testT/testT.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/testN", + "reports/samtools/testN/testN.recal.cram.stats", + "reports/samtools/testT", + "reports/samtools/testT/testT.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/testN", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/testN/testN.freebayes.filtered.TsTv.qual", + "reports/vcftools/freebayes/testT", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/testT/testT.freebayes.filtered.TsTv.qual", + "reports/vcftools/strelka", + "reports/vcftools/strelka/testN", + "reports/vcftools/strelka/testN/testN.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/testN/testN.strelka.variants.TsTv.count", + "reports/vcftools/strelka/testN/testN.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/testT", + "reports/vcftools/strelka/testT/testT.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/testT/testT.strelka.variants.TsTv.count", + "reports/vcftools/strelka/testT/testT.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/freebayes", + "variant_calling/freebayes/testN", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/testN/testN.freebayes.vcf.gz", + "variant_calling/freebayes/testN/testN.freebayes.vcf.gz.tbi", + "variant_calling/freebayes/testT", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/testT/testT.freebayes.vcf.gz", + "variant_calling/freebayes/testT/testT.freebayes.vcf.gz.tbi", + "variant_calling/normalized", + "variant_calling/normalized/testN", + "variant_calling/normalized/testN/testN.freebayes.filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/testN/testN.freebayes.filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/testN/testN.strelka.variants.norm.sorted.vcf.gz", + "variant_calling/normalized/testN/testN.strelka.variants.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/testT", + "variant_calling/normalized/testT/testT.freebayes.filtered.norm.sorted.vcf.gz", + "variant_calling/normalized/testT/testT.freebayes.filtered.norm.sorted.vcf.gz.tbi", + "variant_calling/normalized/testT/testT.strelka.variants.norm.sorted.vcf.gz", + "variant_calling/normalized/testT/testT.strelka.variants.norm.sorted.vcf.gz.tbi", + "variant_calling/strelka", + "variant_calling/strelka/testN", + "variant_calling/strelka/testN/testN.strelka.genome.vcf.gz", + "variant_calling/strelka/testN/testN.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/testN/testN.strelka.variants.vcf.gz", + "variant_calling/strelka/testN/testN.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/testT", + "variant_calling/strelka/testT/testT.strelka.genome.vcf.gz", + "variant_calling/strelka/testT/testT.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/testT/testT.strelka.variants.vcf.gz", + "variant_calling/strelka/testT/testT.strelka.variants.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,9cfd38310d4da59c7242b2a7ae6bd651", + "mosdepth-cumcoverage-dist-id.txt:md5,f0177ed551b6ec930854d0d221904ec0", + "mosdepth_perchrom.txt:md5,9cfd38310d4da59c7242b2a7ae6bd651", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,a0c101497a0566f60b9e8388207944ab", + "samtools_alignment_plot.txt:md5,563784066de81f3bc8ccf5fbbe82b3d5", + "testN.freebayes.filtered.bcftools_stats.txt:md5,96c5e6b9e55f1bb67fca7886fb322b67", + "testT.freebayes.filtered.bcftools_stats.txt:md5,ce8b7afaace836b6a00bd8ccdc980500", + "testN.strelka.variants.bcftools_stats.txt:md5,f11c9e5e9820868809b5465970c7cc06", + "testT.strelka.variants.bcftools_stats.txt:md5,ac070b6cf4de7540a227265971244b31", + "testN.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "testN.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "testN.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "testN.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "testN.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "testT.recal.mosdepth.global.dist.txt:md5,3106c114529adc4231badeb3bb38b6d1", + "testT.recal.mosdepth.region.dist.txt:md5,ccf646922b05cb4759c4f89072be2b69", + "testT.recal.mosdepth.summary.txt:md5,024649a659caff330dfbef4ac3560542", + "testT.recal.regions.bed.gz:md5,14b36a2cf428840aab471f95cfbe399f", + "testT.recal.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917", + "testN.freebayes.filtered.FILTER.summary:md5,e2f8e86fb18631c0bf6ccd7d9d4039c8", + "testN.freebayes.filtered.TsTv.count:md5,89562fef808b5c3db629682d36fd86fc", + "testT.freebayes.filtered.FILTER.summary:md5,4b25443c427fd27761017e4f3a556b29", + "testT.freebayes.filtered.TsTv.count:md5,803d74e40f7716202bae2a3a81c1ddfc", + "testN.strelka.variants.FILTER.summary:md5,39ff2cc8eb7495a14a6b76e0ab627027", + "testN.strelka.variants.TsTv.count:md5,ee7dafc8d941b8502a04a63dc3126fff", + "testT.strelka.variants.FILTER.summary:md5,ad4c8982a91cef84d0c1ff827a0ffcf3", + "testT.strelka.variants.TsTv.count:md5,a32d1781bd32b81ec18c6e0a191c0efe" + ], + "No BAM files", + "No CRAM files", + [ + [ + "testN.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=7, phased=false, phasedAutodetect=false]" + ], + [ + "testT.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=10, phased=false, phasedAutodetect=false]" + ] + ], + [ + "testN.freebayes.filtered.vcf.gz:md5,7a08d55c866e45d635cf378825ddba74", + "testT.freebayes.filtered.vcf.gz:md5,dc1b49ffa2401b2a5193a0527bf8cd0a", + "testN.freebayes.filtered.norm.sorted.vcf.gz:md5,18992a755b06d72374bb072cc8af86f9", + "testN.strelka.variants.norm.sorted.vcf.gz:md5,ba9aabddec39a8bcbdb38c46f7a26515", + "testT.freebayes.filtered.norm.sorted.vcf.gz:md5,574d3d9e73986d07839d5e720c3ea929", + "testT.strelka.variants.norm.sorted.vcf.gz:md5,2dd9f7f6dac9c10afcf01f148a486799", + "testN.strelka.genome.vcf.gz:md5,325ea84ccf1ec1cf7a6c5c4aae1c0d1", + "testN.strelka.variants.vcf.gz:md5,3a3b3e67614ec91f4f0b88999dff454b", + "testT.strelka.genome.vcf.gz:md5,ece653410947a3ace10296c2cc5ff095", + "testT.strelka.variants.vcf.gz:md5,b1cf29a73f6b423cf26ab1a71847c9b" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-03T11:48:18.652411" + } +} diff --git a/tests/postprocess_varlociraptor.nf.test b/tests/postprocess_varlociraptor.nf.test new file mode 100644 index 0000000000..88699e9fd0 --- /dev/null +++ b/tests/postprocess_varlociraptor.nf.test @@ -0,0 +1,93 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + profile "test,mutect" + + def test_scenario = [ + [ + name: "-profile test --tools strelka,varlociraptor --input recalibrated_germline.csv", + params: [ + input: "${projectDir}/tests/csv/3.0/recalibrated_germline.csv", + genome: null, + igenomes_ignore: true, + chr_dir: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz', + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + germline_resource: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz', + germline_resource_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + pon: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz', + pon_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz.tbi', + ngscheckmate_bed: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/SNP_GRCh38_hg38_wChr.bed', + nucleotides_per_second: 20, + step: 'variant_calling', + tools: 'strelka,varlociraptor', + wes: true + ], + ignoreFiles: '**/*.varlociraptor.{vcf}{,.gz}', + include_varlociraptor_vcf: true + ], + [ + name: "-profile test --tools strelka,varlociraptor --input recalibrated_somatic.csv", + params: [ + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + genome: null, + igenomes_ignore: true, + chr_dir: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz', + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + germline_resource: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz', + germline_resource_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + pon: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz', + pon_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz.tbi', + ngscheckmate_bed: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/SNP_GRCh38_hg38_wChr.bed', + nucleotides_per_second: 20, + step: 'variant_calling', + tools: 'strelka,varlociraptor', + wes: true + ], + ignoreFiles: '**/*.varlociraptor.{vcf}{,.gz}', + include_varlociraptor_vcf: true + ], + [ + name: "-profile test --tools mutect2,varlociraptor --input recalibrated_tumoronly.csv", + params: [ + input: "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + genome: null, + igenomes_ignore: true, + chr_dir: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz', + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + germline_resource: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz', + germline_resource_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + pon: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz', + pon_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz.tbi', + ngscheckmate_bed: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/SNP_GRCh38_hg38_wChr.bed', + bcftools_annotations_tbi: null, + bcftools_annotations: null, + nucleotides_per_second: 20, + step: 'variant_calling', + tools: 'mutect2,varlociraptor', + wes: true + ], + ignoreFiles: '**/*.varlociraptor.{vcf}{,.gz}', + include_varlociraptor_vcf: true + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/postprocess_varlociraptor.nf.test.snap b/tests/postprocess_varlociraptor.nf.test.snap new file mode 100644 index 0000000000..88c66534e7 --- /dev/null +++ b/tests/postprocess_varlociraptor.nf.test.snap @@ -0,0 +1,791 @@ +{ + "-profile test --tools strelka,varlociraptor --input recalibrated_somatic.csv": { + "content": [ + 151, + { + "ALIGNMENTPROPERTIES_NORMAL": { + "varlociraptor": "8.9.3" + }, + "ALIGNMENTPROPERTIES_TUMOR": { + "varlociraptor": "8.9.3" + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CONCAT_CALLED_CHUNKS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FILL_SCENARIO_FILE": { + "yte": "1.9.4" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "MERGE_GERMLINE_SOMATIC_VCFS": { + "bcftools": 1.22 + }, + "MERGE_STRELKA": { + "gatk4": "4.6.1.0" + }, + "MERGE_STRELKA_GENOME": { + "gatk4": "4.6.1.0" + }, + "MERGE_STRELKA_INDELS": { + "gatk4": "4.6.1.0" + }, + "MERGE_STRELKA_SNVS": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "PREPROCESS_NORMAL": { + "varlociraptor": "8.9.3" + }, + "PREPROCESS_TUMOR": { + "varlociraptor": "8.9.3" + }, + "RBT_VCFSPLIT": { + "rbt": "0.42.2" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SORT_CALLED_CHUNKS": { + "bcftools": 1.21 + }, + "SORT_FINAL_VCF": { + "bcftools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "STRELKA_SOMATIC": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_GERMLINE": { + "tabix": 1.21 + }, + "TABIX_SOMATIC": { + "tabix": 1.21 + }, + "VARLOCIRAPTOR_CALLVARIANTS": { + "varlociraptor": "8.9.3" + }, + "VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES": { + "varlociraptor": "8.9.3" + }, + "VARLOCIRAPTOR_PREPROCESS": { + "varlociraptor": "8.9.3" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/sample3", + "reports/bcftools/strelka/sample3/sample3.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/sample4_vs_sample3", + "reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt", + "reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/sample3", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.count", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/sample4_vs_sample3", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/sample3", + "variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz", + "variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz", + "variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.vcf.gz.tbi", + "variant_calling/varlociraptor", + "variant_calling/varlociraptor/sample3", + "variant_calling/varlociraptor/sample3/sample3.alignment-properties.json", + "variant_calling/varlociraptor/sample3/sample3.scenario.varlociraptor.yaml", + "variant_calling/varlociraptor/sample3/sample3.strelka.germline.varlociraptor.vcf.gz", + "variant_calling/varlociraptor/sample3/sample3.strelka.germline.varlociraptor.vcf.gz.tbi", + "variant_calling/varlociraptor/sample4_vs_sample3", + "variant_calling/varlociraptor/sample4_vs_sample3/sample3.normal.alignment-properties.json", + "variant_calling/varlociraptor/sample4_vs_sample3/sample4.tumor.alignment-properties.json", + "variant_calling/varlociraptor/sample4_vs_sample3/sample4_vs_sample3.scenario.varlociraptor.yaml", + "variant_calling/varlociraptor/sample4_vs_sample3/sample4_vs_sample3.strelka.merged.vcf.gz", + "variant_calling/varlociraptor/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic.varlociraptor.vcf.gz", + "variant_calling/varlociraptor/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic.varlociraptor.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,4ee05f71086179b42a01cd2fb450346f", + "mosdepth-cumcoverage-dist-id.txt:md5,cb7468f51b8be1230fb3ac5b130be31f", + "mosdepth_perchrom.txt:md5,4ee05f71086179b42a01cd2fb450346f", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample3.strelka.variants.bcftools_stats.txt:md5,6d4d032ba146941cb226765aaed9d67f", + "sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt:md5,62c6123f6494c3cdbd42dc7230e757b3", + "sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt:md5,8404ea88658fbc41d447ba20bf46dd0a", + "sample3.recal.mosdepth.global.dist.txt:md5,69e29702ef01fd8f6c7a5468fc35a16a", + "sample3.recal.mosdepth.region.dist.txt:md5,6ec49cd7d510c2eb3d9d90fdb79b783a", + "sample3.recal.mosdepth.summary.txt:md5,103098d0bf76ed82d2b87d5f242b099a", + "sample3.recal.per-base.bed.gz:md5,297f96648928d0ca5184223fb9941e7c", + "sample3.recal.per-base.bed.gz.csi:md5,c67dcd711b096eb42f43784d5eadbc0d", + "sample3.recal.regions.bed.gz:md5,314ce8d7273eff353072108aa77c327c", + "sample3.recal.regions.bed.gz.csi:md5,9cb0ad7039a3b703d16ca7d5b835c0ee", + "sample4.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample4.recal.mosdepth.region.dist.txt:md5,39005ffaac22871ffaaf19656fe69c5b", + "sample4.recal.mosdepth.summary.txt:md5,68d4b98f17361fddf73052ead34fa370", + "sample4.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample4.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample4.recal.regions.bed.gz:md5,b7561bc56a955f7db0f11e67e2ec0386", + "sample4.recal.regions.bed.gz.csi:md5,393c2749068304d8545b501b9d4658e4", + "sample3.strelka.variants.FILTER.summary:md5,2048a5de0201a6052c988a0189979a5f", + "sample3.strelka.variants.TsTv.count:md5,c5b7a8eda2526d899098439ae4c06a49", + "sample4_vs_sample3.strelka.somatic_indels.FILTER.summary:md5,3441628cd6550ed459ca1c3db989ceea", + "sample4_vs_sample3.strelka.somatic_indels.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f", + "sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary:md5,4fc17fa5625b4d1dcc5d791b1eb22d85", + "sample4_vs_sample3.strelka.somatic_snvs.TsTv.count:md5,fc7af1f534890c4ad3025588b3af62ae", + "sample3.scenario.varlociraptor.yaml:md5,bdc995369d2dede79535e551530c0cf6", + "sample4_vs_sample3.scenario.varlociraptor.yaml:md5,c8097a6ac7cf61639673b87c0a97eb6d" + ], + "No BAM files", + "No CRAM files", + [ + "sample3.strelka.genome.vcf.gz:md5,9f77528d7bb6c4b2c09eacf71a716439", + "sample3.strelka.variants.vcf.gz:md5,5af34e7b632f604dc7a224f977fc2898", + "sample4_vs_sample3.strelka.somatic_indels.vcf.gz:md5,93000b399c48612413fdd5186f133933", + "sample4_vs_sample3.strelka.somatic_snvs.vcf.gz:md5,c1cfe002802bb3997e485f2e7d15f494", + "sample4_vs_sample3.strelka.vcf.gz:md5,2e7972a74fac28cc31844e3ea6eb7fdb", + "sample4_vs_sample3.strelka.merged.vcf.gz:md5,ecbb04bc2a4f802cac75f232d263ebe8" + ], + [ + "sample3.strelka.germline.varlociraptor.vcf.gz:summary,VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=53245, phased=true, phasedAutodetect=false]", + "sample4_vs_sample3.strelka.somatic.varlociraptor.vcf.gz:summary,VcfFile [chromosomes=[chr21], sampleCount=2, variantCount=54634, phased=true, phasedAutodetect=false]" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2025-11-02T16:48:49.54626" + }, + "-profile test --tools mutect2,varlociraptor --input recalibrated_tumoronly.csv": { + "content": [ + 72, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CALCULATECONTAMINATION": { + "gatk4": "4.6.1.0" + }, + "CONCAT_CALLED_CHUNKS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FILL_SCENARIO_FILE": { + "yte": "1.9.4" + }, + "FILTERMUTECTCALLS": { + "gatk4": "4.6.1.0" + }, + "GATHERPILEUPSUMMARIES": { + "gatk4": "4.6.1.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "GETPILEUPSUMMARIES": { + "gatk4": "4.6.1.0" + }, + "LEARNREADORIENTATIONMODEL": { + "gatk4": "4.6.1.0" + }, + "MERGEMUTECTSTATS": { + "gatk4": "4.6.1.0" + }, + "MERGE_MUTECT2": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MUTECT2": { + "gatk4": "4.6.1.0" + }, + "RBT_VCFSPLIT": { + "rbt": "0.42.2" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SORT_CALLED_CHUNKS": { + "bcftools": 1.21 + }, + "SORT_FINAL_VCF": { + "bcftools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VARLOCIRAPTOR_CALLVARIANTS": { + "varlociraptor": "8.9.3" + }, + "VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES": { + "varlociraptor": "8.9.3" + }, + "VARLOCIRAPTOR_PREPROCESS": { + "varlociraptor": "8.9.3" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/mutect2", + "reports/bcftools/mutect2/sample2", + "reports/bcftools/mutect2/sample2/sample2.mutect2.filtered.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/mutect2", + "reports/vcftools/mutect2/sample2", + "reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.FILTER.summary", + "reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.TsTv.count", + "reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.TsTv.qual", + "variant_calling", + "variant_calling/mutect2", + "variant_calling/mutect2/sample2", + "variant_calling/mutect2/sample2/sample2.mutect2.artifactprior.tar.gz", + "variant_calling/mutect2/sample2/sample2.mutect2.contamination.table", + "variant_calling/mutect2/sample2/sample2.mutect2.filtered.vcf.gz", + "variant_calling/mutect2/sample2/sample2.mutect2.filtered.vcf.gz.filteringStats.tsv", + "variant_calling/mutect2/sample2/sample2.mutect2.filtered.vcf.gz.tbi", + "variant_calling/mutect2/sample2/sample2.mutect2.pileups.table", + "variant_calling/mutect2/sample2/sample2.mutect2.segmentation.table", + "variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz", + "variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz.stats", + "variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz.tbi", + "variant_calling/varlociraptor", + "variant_calling/varlociraptor/sample2", + "variant_calling/varlociraptor/sample2/sample2.alignment-properties.json", + "variant_calling/varlociraptor/sample2/sample2.mutect2.tumor_only.varlociraptor.vcf.gz", + "variant_calling/varlociraptor/sample2/sample2.mutect2.tumor_only.varlociraptor.vcf.gz.tbi", + "variant_calling/varlociraptor/sample2/sample2.scenario.varlociraptor.yaml" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,1f24f2f40467234c410a8bda544a8aae", + "mosdepth-cumcoverage-dist-id.txt:md5,1036ea76acae803f591fd99838a8eded", + "mosdepth_perchrom.txt:md5,1f24f2f40467234c410a8bda544a8aae", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.mutect2.filtered.bcftools_stats.txt:md5,5327cede1f3ad2139945607f66264928", + "sample2.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample2.recal.mosdepth.region.dist.txt:md5,39005ffaac22871ffaaf19656fe69c5b", + "sample2.recal.mosdepth.summary.txt:md5,68d4b98f17361fddf73052ead34fa370", + "sample2.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample2.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample2.recal.regions.bed.gz:md5,b7561bc56a955f7db0f11e67e2ec0386", + "sample2.recal.regions.bed.gz.csi:md5,393c2749068304d8545b501b9d4658e4", + "sample2.mutect2.filtered.FILTER.summary:md5,276c858391322083833a42e04fe3554d", + "sample2.mutect2.filtered.TsTv.count:md5,fe3ff1f0c2ead72f037552727438e00a", + "sample2.mutect2.contamination.table:md5,46c708c943b453da89a3da08acfdb2a7", + "sample2.mutect2.filtered.vcf.gz.filteringStats.tsv:md5,d4bfaf449c12bbf4dbda370bbb26074c", + "sample2.mutect2.pileups.table:md5,df85ceff89be6f9a13707d9cda29dd6e", + "sample2.mutect2.segmentation.table:md5,f4643d9319bde4efbfbe516d6fb13052", + "sample2.mutect2.vcf.gz.stats:md5,3cc40a35727af6c5223fb45678f3f172", + "sample2.scenario.varlociraptor.yaml:md5,41c6ed5aa1df37570bfa35acf2f0d936" + ], + "No BAM files", + "No CRAM files", + [ + "sample2.mutect2.filtered.vcf.gz:md5,245acd4882f96f2b60e63b7fce4cbc5", + "sample2.mutect2.vcf.gz:md5,a53450657afc33f2a7b87fd75bf24267" + ], + [ + "sample2.mutect2.tumor_only.varlociraptor.vcf.gz:summary,VcfFile [chromosomes=[chr21], sampleCount=2, variantCount=1258, phased=true, phasedAutodetect=false]" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2025-11-04T11:46:27.501146" + }, + "-profile test --tools strelka,varlociraptor --input recalibrated_germline.csv": { + "content": [ + 67, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CONCAT_CALLED_CHUNKS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FILL_SCENARIO_FILE": { + "yte": "1.9.4" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "MERGE_STRELKA": { + "gatk4": "4.6.1.0" + }, + "MERGE_STRELKA_GENOME": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "RBT_VCFSPLIT": { + "rbt": "0.42.2" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SORT_CALLED_CHUNKS": { + "bcftools": 1.21 + }, + "SORT_FINAL_VCF": { + "bcftools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_GERMLINE": { + "tabix": 1.21 + }, + "VARLOCIRAPTOR_CALLVARIANTS": { + "varlociraptor": "8.9.3" + }, + "VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES": { + "varlociraptor": "8.9.3" + }, + "VARLOCIRAPTOR_PREPROCESS": { + "varlociraptor": "8.9.3" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/sample1", + "reports/bcftools/strelka/sample1/sample1.strelka.variants.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample1", + "reports/mosdepth/sample1/sample1.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.summary.txt", + "reports/mosdepth/sample1/sample1.recal.per-base.bed.gz", + "reports/mosdepth/sample1/sample1.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample1", + "reports/samtools/sample1/sample1.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/sample1", + "reports/vcftools/strelka/sample1/sample1.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.count", + "reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/sample1", + "variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz", + "variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/sample1/sample1.strelka.variants.vcf.gz", + "variant_calling/strelka/sample1/sample1.strelka.variants.vcf.gz.tbi", + "variant_calling/varlociraptor", + "variant_calling/varlociraptor/sample1", + "variant_calling/varlociraptor/sample1/sample1.alignment-properties.json", + "variant_calling/varlociraptor/sample1/sample1.scenario.varlociraptor.yaml", + "variant_calling/varlociraptor/sample1/sample1.strelka.germline.varlociraptor.vcf.gz", + "variant_calling/varlociraptor/sample1/sample1.strelka.germline.varlociraptor.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,04d9eccc248633a38fb253bc70fb8d62", + "mosdepth-cumcoverage-dist-id.txt:md5,b7016944a8325bef2c0ed542246acfa9", + "mosdepth_perchrom.txt:md5,04d9eccc248633a38fb253bc70fb8d62", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,41d36c1a8413d565788a1a364b467606", + "samtools_alignment_plot.txt:md5,35f8fd2a557568b2237193f46afbab5c", + "sample1.strelka.variants.bcftools_stats.txt:md5,7d091579d450a6f6d6e6ed9795dce0cb", + "sample1.recal.mosdepth.global.dist.txt:md5,69e29702ef01fd8f6c7a5468fc35a16a", + "sample1.recal.mosdepth.region.dist.txt:md5,6ec49cd7d510c2eb3d9d90fdb79b783a", + "sample1.recal.mosdepth.summary.txt:md5,103098d0bf76ed82d2b87d5f242b099a", + "sample1.recal.per-base.bed.gz:md5,297f96648928d0ca5184223fb9941e7c", + "sample1.recal.per-base.bed.gz.csi:md5,c67dcd711b096eb42f43784d5eadbc0d", + "sample1.recal.regions.bed.gz:md5,314ce8d7273eff353072108aa77c327c", + "sample1.recal.regions.bed.gz.csi:md5,9cb0ad7039a3b703d16ca7d5b835c0ee", + "sample1.strelka.variants.FILTER.summary:md5,2048a5de0201a6052c988a0189979a5f", + "sample1.strelka.variants.TsTv.count:md5,c5b7a8eda2526d899098439ae4c06a49", + "sample1.scenario.varlociraptor.yaml:md5,bdc995369d2dede79535e551530c0cf6" + ], + "No BAM files", + "No CRAM files", + [ + "sample1.strelka.genome.vcf.gz:md5,9f77528d7bb6c4b2c09eacf71a716439", + "sample1.strelka.variants.vcf.gz:md5,5af34e7b632f604dc7a224f977fc2898" + ], + [ + "sample1.strelka.germline.varlociraptor.vcf.gz:summary,VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=53245, phased=true, phasedAutodetect=false]" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2025-11-01T23:04:58.277878" + } +} diff --git a/tests/qc_ngscheckmate.nf.test b/tests/qc_ngscheckmate.nf.test new file mode 100644 index 0000000000..e37e2f32eb --- /dev/null +++ b/tests/qc_ngscheckmate.nf.test @@ -0,0 +1,39 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools ngscheckmate", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated.csv", + step: "variant_calling", + tools: 'ngscheckmate' + ] + ], + [ + name: "-profile test --tools ngscheckmate -stub", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated.csv", + step: "variant_calling", + tools: 'ngscheckmate' + ], + stub: true + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/qc_ngscheckmate.nf.test.snap b/tests/qc_ngscheckmate.nf.test.snap new file mode 100644 index 0000000000..7f721c34c2 --- /dev/null +++ b/tests/qc_ngscheckmate.nf.test.snap @@ -0,0 +1,291 @@ +{ + "-profile test --tools ngscheckmate -stub": { + "content": [ + 17, + { + "BCFTOOLS_MPILEUP": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "NGSCHECKMATE_NCM": { + "ngscheckmate": "1.0.1" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/.stub", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/sample1", + "reports/mosdepth/sample1/sample1.recal.global.dist.txt", + "reports/mosdepth/sample1/sample1.recal.per-base.bed.gz", + "reports/mosdepth/sample1/sample1.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample1/sample1.recal.per-base.d4", + "reports/mosdepth/sample1/sample1.recal.quantized.bed.gz", + "reports/mosdepth/sample1/sample1.recal.quantized.bed.gz.csi", + "reports/mosdepth/sample1/sample1.recal.region.dist.txt", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz.csi", + "reports/mosdepth/sample1/sample1.recal.summary.txt", + "reports/mosdepth/sample1/sample1.recal.thresholds.bed.gz", + "reports/mosdepth/sample1/sample1.recal.thresholds.bed.gz.csi", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample2/sample2.recal.per-base.d4", + "reports/mosdepth/sample2/sample2.recal.quantized.bed.gz", + "reports/mosdepth/sample2/sample2.recal.quantized.bed.gz.csi", + "reports/mosdepth/sample2/sample2.recal.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/mosdepth/sample2/sample2.recal.summary.txt", + "reports/mosdepth/sample2/sample2.recal.thresholds.bed.gz", + "reports/mosdepth/sample2/sample2.recal.thresholds.bed.gz.csi", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.per-base.d4", + "reports/mosdepth/sample3/sample3.recal.quantized.bed.gz", + "reports/mosdepth/sample3/sample3.recal.quantized.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.summary.txt", + "reports/mosdepth/sample3/sample3.recal.thresholds.bed.gz", + "reports/mosdepth/sample3/sample3.recal.thresholds.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample4/sample4.recal.per-base.d4", + "reports/mosdepth/sample4/sample4.recal.quantized.bed.gz", + "reports/mosdepth/sample4/sample4.recal.quantized.bed.gz.csi", + "reports/mosdepth/sample4/sample4.recal.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4/sample4.recal.summary.txt", + "reports/mosdepth/sample4/sample4.recal.thresholds.bed.gz", + "reports/mosdepth/sample4/sample4.recal.thresholds.bed.gz.csi", + "reports/ngscheckmate", + "reports/ngscheckmate/ngscheckmate.pdf", + "reports/ngscheckmate/ngscheckmate_all.txt", + "reports/ngscheckmate/ngscheckmate_matched.txt", + "reports/ngscheckmate/ngscheckmate_output_corr_matrix.txt", + "reports/ngscheckmate/vcfs", + "reports/ngscheckmate/vcfs/sample1.ngscheckmate.vcf.gz", + "reports/ngscheckmate/vcfs/sample2.ngscheckmate.vcf.gz", + "reports/ngscheckmate/vcfs/sample3.ngscheckmate.vcf.gz", + "reports/ngscheckmate/vcfs/sample4.ngscheckmate.vcf.gz", + "reports/samtools", + "reports/samtools/sample1", + "reports/samtools/sample1/sample1.recal.cram.stats", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats" + ], + [ + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "timestamp": "2026-04-08T12:11:32.12215582", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.2" + } + }, + "-profile test --tools ngscheckmate": { + "content": [ + 17, + { + "BCFTOOLS_MPILEUP": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "NGSCHECKMATE_NCM": { + "ngscheckmate": "1.0.1" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/sample1", + "reports/mosdepth/sample1/sample1.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.summary.txt", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz.csi", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/ngscheckmate", + "reports/ngscheckmate/ngscheckmate.pdf", + "reports/ngscheckmate/ngscheckmate_all.txt", + "reports/ngscheckmate/ngscheckmate_matched.txt", + "reports/ngscheckmate/ngscheckmate_output_corr_matrix.txt", + "reports/ngscheckmate/vcfs", + "reports/ngscheckmate/vcfs/sample1.ngscheckmate.vcf.gz", + "reports/ngscheckmate/vcfs/sample2.ngscheckmate.vcf.gz", + "reports/ngscheckmate/vcfs/sample3.ngscheckmate.vcf.gz", + "reports/ngscheckmate/vcfs/sample4.ngscheckmate.vcf.gz", + "reports/samtools", + "reports/samtools/sample1", + "reports/samtools/sample1/sample1.recal.cram.stats", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,73ef9a077df1887f9021a581fbf207bc", + "mosdepth-cumcoverage-dist-id.txt:md5,ad0637d55d7025330f2f6cb7f9680e64", + "mosdepth_perchrom.txt:md5,73ef9a077df1887f9021a581fbf207bc", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "samtools-stats-dp.txt:md5,b446a47b182d93a9e7e74f5f7c8d41c2", + "samtools_alignment_plot.txt:md5,7138a2d29f515993e1df8d745e27b757", + "sample1.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample1.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample1.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample1.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample1.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample2.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample2.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample2.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample2.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample2.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample3.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample3.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample3.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample3.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample3.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample4.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample4.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample4.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample4.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample4.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "ngscheckmate_all.txt:md5,f858e3eb892b8245cbf74cc979e4f33a", + "ngscheckmate_matched.txt:md5,ab2c5b46e9a4dfb3bb54292db931b58b", + "ngscheckmate_output_corr_matrix.txt:md5,a86afff85677c875503d495cbbb4f495" + ], + "No BAM files", + "No CRAM files", + [ + "sample1.ngscheckmate.vcf.gz:md5,5cffc2b460469247b346b2399dc016fd", + "sample2.ngscheckmate.vcf.gz:md5,17c1253ed56084291e919586ecf9dca", + "sample3.ngscheckmate.vcf.gz:md5,5cffc2b460469247b346b2399dc016fd", + "sample4.ngscheckmate.vcf.gz:md5,17c1253ed56084291e919586ecf9dca" + ], + "No warnings" + ], + "timestamp": "2025-09-30T21:27:36.837652254", + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + } + } +} \ No newline at end of file diff --git a/tests/samplesheets.nf.test b/tests/samplesheets.nf.test new file mode 100644 index 0000000000..981e368552 --- /dev/null +++ b/tests/samplesheets.nf.test @@ -0,0 +1,63 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --input tests/csv/3.0/fastq_sample_with_space.csv", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_sample_with_space.csv" + ], + failure: true, + snapshot: 'stderr', + + ], + [ + name: "-profile test --step variant_calling --input tests/csv/3.0/recalibrated_somatic_two_normal_one_sample.csv", + params: [ + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic_two_normal_one_sample.csv", + step: "variant_calling" + ], + failure: true, + snapshot: 'stderr', + + ], + [ + name: "-profile test,spark --input tests/csv/3.0/fastq_single.csv --use_gatk_spark baserecalibrator,markduplicates --save_mapped --save_output_as_bam", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_single.csv", + use_gatk_spark: 'baserecalibrator,markduplicates', + save_mapped: true, + save_output_as_bam: true + ], + failure: true, + snapshot: 'stderr', + + ], + [ + name: "-profile test --input tests/csv/3.0/fastq_multiple_lane_ids.csv", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_multiple_lane_ids.csv" + ], + failure: true, + snapshot: 'stderr', + + ], + [ + name: "-profile test --input tests/csv/3.0/fastq_multiple_sample_ids.csv", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_multiple_sample_ids.csv" + ], + failure: true, + snapshot: 'stderr', + + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/samplesheets.nf.test.snap b/tests/samplesheets.nf.test.snap new file mode 100644 index 0000000000..0247e3b1ea --- /dev/null +++ b/tests/samplesheets.nf.test.snap @@ -0,0 +1,122 @@ +{ + "-profile test --step variant_calling --input tests/csv/3.0/recalibrated_somatic_two_normal_one_sample.csv": { + "content": [ + [ + "csv", + "pipeline_info" + ], + "No stable content", + "No BAM files", + "No CRAM files", + "No VCF files", + [ + "WARN: Failed to render execution report -- see the log file for details", + "WARN: Failed to render execution timeline -- see the log file for details" + ], + [ + "Patient [test] has more than one sample [2] with normal status [0] and one sample with tumor status [1]." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-06T13:52:36.729578236" + }, + "-profile test --input tests/csv/3.0/fastq_sample_with_space.csv": { + "content": [ + [ + "pipeline_info" + ], + "No stable content", + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings", + [ + "The following invalid input values have been detected:", + "* --input ([PATH]/./tests/csv/3.0/fastq_sample_with_space.csv): Validation of file failed:", + "\t-> Entry 2: Error for field 'sample' (test 2): \"test 2\" does not match regular expression [^\\S+$] (Sample ID must be provided, cannot contain spaces and must be a string value)", + " -- Check script '[PATH]/subworkflows/nf-core/utils_nfschema_plugin/main.nf' at line: 68 or see '[PATH]/tests/[NFT_HASH]/meta/nextflow.log' file for more details" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-16T12:15:16.691338949" + }, + "-profile test --input tests/csv/3.0/fastq_multiple_sample_ids.csv": { + "content": [ + [ + "csv", + "pipeline_info" + ], + "No stable content", + "No BAM files", + "No CRAM files", + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1", + "WARN: Failed to render execution report -- see the log file for details", + "WARN: Failed to render execution timeline -- see the log file for details" + ], + [ + "Sample ID 'test' is associated with multiple patient IDs: test, test2. Please ensure each sample ID is unique to a single patient." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-06T13:54:49.322255498" + }, + "-profile test --input tests/csv/3.0/fastq_multiple_lane_ids.csv": { + "content": [ + [ + "csv", + "pipeline_info" + ], + "No stable content", + "No BAM files", + "No CRAM files", + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1", + "WARN: Failed to render execution report -- see the log file for details", + "WARN: Failed to render execution timeline -- see the log file for details" + ], + [ + "Duplicate patient-sample-status-lane combination found: Patient 'test2', Sample 'test2', Status '0', Lane 'test_L1' appears 2 times. Please ensure each combination is unique." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-06T13:54:04.695361045" + }, + "-profile test,spark --input tests/csv/3.0/fastq_single.csv --use_gatk_spark baserecalibrator,markduplicates --save_mapped --save_output_as_bam": { + "content": [ + [ + "pipeline_info" + ], + "No stable content", + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings", + [ + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", + " The --use_gatk_spark option is not compatible with --save_mapped and --save_output_as_bam.", + " If you want to save your bam files please swap to the normal gatk implementation.", + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-16T12:16:38.014556073" + } +} \ No newline at end of file diff --git a/tests/save_mapped.nf.test b/tests/save_mapped.nf.test new file mode 100644 index 0000000000..5dbb0353a2 --- /dev/null +++ b/tests/save_mapped.nf.test @@ -0,0 +1,21 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --save_mapped skip QC/recal/md", + params: [ + save_mapped: true, + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: '' + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/save_mapped.nf.test.snap b/tests/save_mapped.nf.test.snap new file mode 100644 index 0000000000..6f0bb2bc3c --- /dev/null +++ b/tests/save_mapped.nf.test.snap @@ -0,0 +1,65 @@ +{ + "-profile test --save_mapped skip QC/recal/md": { + "content": [ + 10, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "reference" + ], + "No stable content", + "No BAM files", + [ + "test.sorted.cram:md5,5534c350547fd253f0f2b9450362bed" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T21:27:11.108509" + } +} diff --git a/tests/save_output_as_bam.nf.test b/tests/save_output_as_bam.nf.test new file mode 100644 index 0000000000..f0dc6c637d --- /dev/null +++ b/tests/save_output_as_bam.nf.test @@ -0,0 +1,21 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --save_output_as_bam skip QC/recal/md", + params: [ + save_output_as_bam: true, + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: '' + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/save_output_as_bam.nf.test.snap b/tests/save_output_as_bam.nf.test.snap new file mode 100644 index 0000000000..6fc4012969 --- /dev/null +++ b/tests/save_output_as_bam.nf.test.snap @@ -0,0 +1,65 @@ +{ + "-profile test --save_output_as_bam skip QC/recal/md": { + "content": [ + 10, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.bam", + "preprocessing/mapped/test/test.sorted.bam.bai", + "reference" + ], + "No stable content", + [ + "test.sorted.bam:md5,5534c350547fd253f0f2b9450362bed" + ], + "No CRAM files", + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T21:28:27.476637665" + } +} diff --git a/tests/sentieon.nf.test b/tests/sentieon.nf.test new file mode 100644 index 0000000000..8d3efa5e51 --- /dev/null +++ b/tests/sentieon.nf.test @@ -0,0 +1,22 @@ +nextflow_pipeline { + + name "Test sentieon" + script "../main.nf" + tag "sentieon" + + def test_scenario = [ + [ + name: "-profile test --aligner sentieon-bwamem --tools null -stub", + params: [ + aligner: "sentieon-bwamem", + tools: '' + ], + stub: true + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/sentieon.nf.test.snap b/tests/sentieon.nf.test.snap new file mode 100644 index 0000000000..859fee2440 --- /dev/null +++ b/tests/sentieon.nf.test.snap @@ -0,0 +1,128 @@ +{ + "-profile test --aligner sentieon-bwamem --tools null -stub": { + "content": [ + 18, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SENTIEON_BWAMEM": { + "bwa": "0.7.17-r1188", + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/.stub", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.cram", + "preprocessing/markduplicates/test/test.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1.html", + "reports/fastqc/test-test_L1/test-test_L1.zip", + "reports/fastqc/test-test_L2", + "reports/fastqc/test-test_L2/test-test_L2.html", + "reports/fastqc/test-test_L2/test-test_L2.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.global.dist.txt", + "reports/mosdepth/test/test.md.per-base.bed.gz", + "reports/mosdepth/test/test.md.per-base.bed.gz.csi", + "reports/mosdepth/test/test.md.per-base.d4", + "reports/mosdepth/test/test.md.quantized.bed.gz", + "reports/mosdepth/test/test.md.quantized.bed.gz.csi", + "reports/mosdepth/test/test.md.region.dist.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.md.summary.txt", + "reports/mosdepth/test/test.md.thresholds.bed.gz", + "reports/mosdepth/test/test.md.thresholds.bed.gz.csi", + "reports/mosdepth/test/test.recal.global.dist.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/mosdepth/test/test.recal.per-base.d4", + "reports/mosdepth/test/test.recal.quantized.bed.gz", + "reports/mosdepth/test/test.recal.quantized.bed.gz.csi", + "reports/mosdepth/test/test.recal.region.dist.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.summary.txt", + "reports/mosdepth/test/test.recal.thresholds.bed.gz", + "reports/mosdepth/test/test.recal.thresholds.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1", + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "timestamp": "2026-04-08T12:12:40.530730643", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.2" + } + } +} \ No newline at end of file diff --git a/tests/sentieon_aligner_bwamem.nf.test b/tests/sentieon_aligner_bwamem.nf.test new file mode 100644 index 0000000000..e1c800b01c --- /dev/null +++ b/tests/sentieon_aligner_bwamem.nf.test @@ -0,0 +1,48 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + tag "sentieon" + + def test_scenario = [ + [ + name: "-profile test --aligner sentieon-bwamem --save_reference skip QC/recal/md", + params: [ + aligner: 'sentieon-bwamem', + save_reference: true, + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: '' + ] + ], + [ + name: "-profile test --aligner sentieon-bwamem --save_reference --build_only_index", + params: [ + aligner: 'sentieon-bwamem', + build_only_index: true, + input: false, + save_reference: true, + skip_tools: 'multiqc', + tools: '' + ] + ], + [ + name: "-profile test --input fastq_umi.csv --aligner sentieon-bwamem --umi_location read2 --umi_length 7 --umi_base_skip 1 --tools sentieon_dedup", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_umi.csv", + aligner: 'sentieon-bwamem', + skip_tools: 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools', + tools: 'sentieon_dedup', + umi_length: 7, + umi_base_skip: 1, + umi_location: 'read2' + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/sentieon_aligner_bwamem.nf.test.snap b/tests/sentieon_aligner_bwamem.nf.test.snap new file mode 100644 index 0000000000..b93b0f2a60 --- /dev/null +++ b/tests/sentieon_aligner_bwamem.nf.test.snap @@ -0,0 +1,222 @@ +{ + "-profile test --aligner sentieon-bwamem --save_reference skip QC/recal/md": { + "content": [ + 10, + { + "BAM_TO_CRAM_MAPPING": { + "samtools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_MERGE_BAM": { + "samtools": 1.21 + }, + "MERGE_BAM": { + "samtools": 1.21 + }, + "SENTIEON_BWAMEM": { + "bwa": "0.7.17-r1188", + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/mapped.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/mapped", + "preprocessing/mapped/test", + "preprocessing/mapped/test/test.sorted.cram", + "preprocessing/mapped/test/test.sorted.cram.crai", + "reference", + "reference/bwa", + "reference/bwa/genome.amb", + "reference/bwa/genome.ann", + "reference/bwa/genome.bwt", + "reference/bwa/genome.pac", + "reference/bwa/genome.sa", + "reference/intervals", + "reference/intervals/chr22_1-40001.bed", + "reference/intervals/chr22_1-40001.bed.gz", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz" + ], + [ + "genome.amb:md5,1891c1de381b3a96d4e72f590fde20c1", + "genome.ann:md5,2df4aa2d7580639fa0fcdbcad5e2e969", + "genome.bwt:md5,815eded87e4cb6b0f1daab5c4d6e30af", + "genome.pac:md5,8569fbdb2c98c6fb16dfa73d8eacb070", + "genome.sa:md5,e7cff62b919448a3a3d0fe4aaf427594", + "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e" + ], + "No BAM files", + [ + "test.sorted.cram:md5,a9a4d2edba1b98ff734869b65fb437a7" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-06-21T10:32:56.848655102" + }, + "-profile test --input fastq_umi.csv --aligner sentieon-bwamem --umi_location read2 --umi_length 7 --umi_base_skip 1 --tools sentieon_dedup": { + "content": [ + 9, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTP": { + "fastp": "0.24.0" + }, + "FGBIO_COPYUMIFROMREADNAME": { + "fgbio": "2.4.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "SENTIEON_BWAMEM": { + "bwa": "0.7.17-r1188", + "sentieon": 202503.01 + }, + "SENTIEON_DEDUP": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates_no_table.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/sentieon_dedup", + "preprocessing/sentieon_dedup/test", + "preprocessing/sentieon_dedup/test/test.dedup.cram", + "preprocessing/sentieon_dedup/test/test.dedup.cram.crai", + "reference", + "reports", + "reports/fastp", + "reports/fastp/test", + "reports/fastp/test/test-test_L1.fastp.html", + "reports/fastp/test/test-test_L1.fastp.json", + "reports/fastp/test/test-test_L1.fastp.log", + "reports/sentieon_dedup", + "reports/sentieon_dedup/test", + "reports/sentieon_dedup/test/test.dedup.cram.metrics", + "reports/sentieon_dedup/test/test.dedup.cram.metrics.multiqc.tsv" + ], + [ + "test-test_L1.fastp.json:md5,d9107ec414e44408d0698b167cc445f9" + ], + "No BAM files", + [ + "test.dedup.cram:md5,4b2f03b0b0b2ae2d2f208b3dd39c05dd" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_umi_cleanname_1.fastq.gz): Cannot extract flowcell ID from @922332" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-08-18T06:00:05.261761108" + }, + "-profile test --aligner sentieon-bwamem --save_reference --build_only_index": { + "content": [ + 5, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/bwa", + "reference/bwa/genome.amb", + "reference/bwa/genome.ann", + "reference/bwa/genome.bwt", + "reference/bwa/genome.pac", + "reference/bwa/genome.sa", + "reference/intervals", + "reference/intervals/chr22_1-40001.bed", + "reference/intervals/chr22_1-40001.bed.gz", + "reference/intervals/genome.bed", + "reference/intervals/genome.bed.gz" + ], + [ + "genome.amb:md5,1891c1de381b3a96d4e72f590fde20c1", + "genome.ann:md5,2df4aa2d7580639fa0fcdbcad5e2e969", + "genome.bwt:md5,815eded87e4cb6b0f1daab5c4d6e30af", + "genome.pac:md5,8569fbdb2c98c6fb16dfa73d8eacb070", + "genome.sa:md5,e7cff62b919448a3a3d0fe4aaf427594", + "chr22_1-40001.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "chr22_1-40001.bed.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7", + "genome.bed:md5,a87dc7d20ebca626f65cc16ff6c97a3e", + "genome.bed.gz:md5,a87dc7d20ebca626f65cc16ff6c97a3e" + ], + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-06-21T10:38:04.521781501" + } +} diff --git a/tests/sentieon_dedup.nf.test b/tests/sentieon_dedup.nf.test new file mode 100644 index 0000000000..3089d885f1 --- /dev/null +++ b/tests/sentieon_dedup.nf.test @@ -0,0 +1,60 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + tag "sentieon" + + def test_scenario = [ + [ + name: "-profile test --tools sentieon_dedup --step markduplicates --input tests/csv/3.0/mapped_single_bam.csv", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_bam.csv", + step: 'markduplicates', + tools: 'sentieon_dedup' + ] + ], + [ + name: "-profile test --tools sentieon_dedup --step markduplicates --input tests/csv/3.0/mapped_single_cram.csv", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_cram.csv", + step: 'markduplicates', + tools: 'sentieon_dedup' + ] + ], + [ + name: "-profile test --tools sentieon_dedup --step markduplicates --input tests/csv/3.0/mapped_single_cram.csv --sentieon_consensus", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_cram.csv", + step: 'markduplicates', + tools: 'sentieon_dedup', + sentieon_consensus: true + ] + ], + [ + name: "-profile test --tools sentieon_dedup --step markduplicates --input tests/csv/3.0/bam_umi_header.csv --umi_in_read_header", + params: [ + input: "${projectDir}/tests/csv/3.0/bam_umi_header.csv", + step: 'markduplicates', + tools: 'sentieon_dedup', + umi_in_read_header: true + ] + ], + [ + name: "-profile test --tools sentieon_dedup --step markduplicates --input tests/csv/3.0/bam_umi_header.csv --sentieon_consensus --umi_in_read_header", + params: [ + input: "${projectDir}/tests/csv/3.0/bam_umi_header.csv", + step: 'markduplicates', + tools: 'sentieon_dedup', + sentieon_consensus: true, + umi_in_read_header: true + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/sentieon_dedup.nf.test.snap b/tests/sentieon_dedup.nf.test.snap new file mode 100644 index 0000000000..16037761e5 --- /dev/null +++ b/tests/sentieon_dedup.nf.test.snap @@ -0,0 +1,938 @@ +{ + "-profile test --tools sentieon_dedup --step markduplicates --input tests/csv/3.0/mapped_single_bam.csv": { + "content": [ + 13, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SENTIEON_DEDUP": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_dedup_metrics.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/dedup_metrics.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/dedup_metrics.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/dedup_metrics.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "preprocessing/sentieon_dedup", + "preprocessing/sentieon_dedup/test", + "preprocessing/sentieon_dedup/test/test.dedup.cram", + "preprocessing/sentieon_dedup/test/test.dedup.cram.crai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.dedup.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.dedup.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.dedup.mosdepth.summary.txt", + "reports/mosdepth/test/test.dedup.regions.bed.gz", + "reports/mosdepth/test/test.dedup.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.dedup.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/sentieon_dedup", + "reports/sentieon_dedup/test", + "reports/sentieon_dedup/test/test.dedup.cram.metrics", + "reports/sentieon_dedup/test/test.dedup.cram.metrics.multiqc.tsv" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,2a082d53dcf3a854f46d5af6c931d134", + "mosdepth-cumcoverage-dist-id.txt:md5,7e97a5d607e949529a7bc8e92c6c541e", + "mosdepth_perchrom.txt:md5,2a082d53dcf3a854f46d5af6c931d134", + "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", + "multiqc_dedup_metrics.txt:md5,4756b5e5f80faca763179470951ef0f3", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,b20f475e03ef81a68b3dec99b7e6aae4", + "samtools_alignment_plot.txt:md5,b693942009526869bd21cbc42d54f53c", + "test.dedup.mosdepth.global.dist.txt:md5,8e875e20e3fb9cf288d68c1d223f6fd5", + "test.dedup.mosdepth.region.dist.txt:md5,75e1ce7e55af51f4985fa91654a5ea2d", + "test.dedup.mosdepth.summary.txt:md5,b23cf96942b2ada3f41172a9349a1175", + "test.dedup.regions.bed.gz:md5,74cd0c779c7b3228adcf3b177333886a", + "test.dedup.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917", + "test.recal.mosdepth.global.dist.txt:md5,8e875e20e3fb9cf288d68c1d223f6fd5", + "test.recal.mosdepth.region.dist.txt:md5,75e1ce7e55af51f4985fa91654a5ea2d", + "test.recal.mosdepth.summary.txt:md5,b23cf96942b2ada3f41172a9349a1175", + "test.recal.regions.bed.gz:md5,74cd0c779c7b3228adcf3b177333886a", + "test.recal.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917" + ], + "No BAM files", + [ + "test.recal.cram:md5,463ac3b905fbf4ddf113a94dbfa8d69f", + "test.dedup.cram:md5,2f11e4fe3390b8ad0a1852616fd1da04" + ], + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-06-21T10:54:13.719485962" + }, + "-profile test --tools sentieon_dedup --step markduplicates --input tests/csv/3.0/mapped_single_cram.csv --sentieon_consensus": { + "content": [ + 13, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SENTIEON_DEDUP": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_dedup_metrics.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/dedup_metrics.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/dedup_metrics.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/dedup_metrics.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "preprocessing/sentieon_consensus", + "preprocessing/sentieon_consensus/test", + "preprocessing/sentieon_consensus/test/test.consensus.cram", + "preprocessing/sentieon_consensus/test/test.consensus.cram.crai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.dedup.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.dedup.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.dedup.mosdepth.summary.txt", + "reports/mosdepth/test/test.dedup.regions.bed.gz", + "reports/mosdepth/test/test.dedup.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.dedup.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/sentieon_dedup", + "reports/sentieon_dedup/test", + "reports/sentieon_dedup/test/test.consensus.cram.metrics", + "reports/sentieon_dedup/test/test.consensus.cram.metrics.multiqc.tsv" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,7aa9e9bbc9997fd415ec0bee7584aec2", + "mosdepth-cumcoverage-dist-id.txt:md5,d50c2592adebfaa41581caf56deddcf4", + "mosdepth_perchrom.txt:md5,7aa9e9bbc9997fd415ec0bee7584aec2", + "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", + "multiqc_dedup_metrics.txt:md5,39184426a647ce0e9a1c93082800baf8", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,b7a972304062dab47bf1ef592defa909", + "samtools_alignment_plot.txt:md5,5c19e1b5d6980e773e9a182557ae20f8", + "test.dedup.mosdepth.global.dist.txt:md5,cc38f46822141ac16b5025ac5d62d044", + "test.dedup.mosdepth.region.dist.txt:md5,86b78626070a52dbe36d952b85814921", + "test.dedup.mosdepth.summary.txt:md5,3cf3700ecd4ec08825ab33e8bcbc3206", + "test.dedup.regions.bed.gz:md5,ba570a03993dbc155da2e5bf8e42f62e", + "test.dedup.regions.bed.gz.csi:md5,10b7f8e9ac2ac27205f8bc827a186195", + "test.recal.mosdepth.global.dist.txt:md5,cc38f46822141ac16b5025ac5d62d044", + "test.recal.mosdepth.region.dist.txt:md5,86b78626070a52dbe36d952b85814921", + "test.recal.mosdepth.summary.txt:md5,3cf3700ecd4ec08825ab33e8bcbc3206", + "test.recal.regions.bed.gz:md5,ba570a03993dbc155da2e5bf8e42f62e", + "test.recal.regions.bed.gz.csi:md5,10b7f8e9ac2ac27205f8bc827a186195" + ], + "No BAM files", + [ + "test.recal.cram:md5,4b6c484e29620bcf71dd9d00ea7b8fa6", + "test.consensus.cram:md5,7b5a6380e09ad4da2a4b9a2e6bda2d1a" + ], + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-08-15T09:38:15.648926052" + }, + "-profile test --tools sentieon_dedup --step markduplicates --input tests/csv/3.0/mapped_single_cram.csv": { + "content": [ + 13, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SENTIEON_DEDUP": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_dedup_metrics.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/dedup_metrics.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/dedup_metrics.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/dedup_metrics.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "preprocessing/sentieon_dedup", + "preprocessing/sentieon_dedup/test", + "preprocessing/sentieon_dedup/test/test.dedup.cram", + "preprocessing/sentieon_dedup/test/test.dedup.cram.crai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.dedup.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.dedup.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.dedup.mosdepth.summary.txt", + "reports/mosdepth/test/test.dedup.regions.bed.gz", + "reports/mosdepth/test/test.dedup.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.dedup.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/sentieon_dedup", + "reports/sentieon_dedup/test", + "reports/sentieon_dedup/test/test.dedup.cram.metrics", + "reports/sentieon_dedup/test/test.dedup.cram.metrics.multiqc.tsv" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,2a082d53dcf3a854f46d5af6c931d134", + "mosdepth-cumcoverage-dist-id.txt:md5,7e97a5d607e949529a7bc8e92c6c541e", + "mosdepth_perchrom.txt:md5,2a082d53dcf3a854f46d5af6c931d134", + "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", + "multiqc_dedup_metrics.txt:md5,4756b5e5f80faca763179470951ef0f3", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,b20f475e03ef81a68b3dec99b7e6aae4", + "samtools_alignment_plot.txt:md5,b693942009526869bd21cbc42d54f53c", + "test.dedup.mosdepth.global.dist.txt:md5,8e875e20e3fb9cf288d68c1d223f6fd5", + "test.dedup.mosdepth.region.dist.txt:md5,75e1ce7e55af51f4985fa91654a5ea2d", + "test.dedup.mosdepth.summary.txt:md5,b23cf96942b2ada3f41172a9349a1175", + "test.dedup.regions.bed.gz:md5,74cd0c779c7b3228adcf3b177333886a", + "test.dedup.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917", + "test.recal.mosdepth.global.dist.txt:md5,8e875e20e3fb9cf288d68c1d223f6fd5", + "test.recal.mosdepth.region.dist.txt:md5,75e1ce7e55af51f4985fa91654a5ea2d", + "test.recal.mosdepth.summary.txt:md5,b23cf96942b2ada3f41172a9349a1175", + "test.recal.regions.bed.gz:md5,74cd0c779c7b3228adcf3b177333886a", + "test.recal.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917" + ], + "No BAM files", + [ + "test.recal.cram:md5,463ac3b905fbf4ddf113a94dbfa8d69f", + "test.dedup.cram:md5,2f11e4fe3390b8ad0a1852616fd1da04" + ], + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-06-21T11:05:41.448189865" + }, + "-profile test --tools sentieon_dedup --step markduplicates --input tests/csv/3.0/bam_umi_header.csv --sentieon_consensus --umi_in_read_header": { + "content": [ + 14, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FGBIO_COPYUMIFROMREADNAME": { + "fgbio": "2.4.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SENTIEON_DEDUP": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_dedup_metrics.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/dedup_metrics.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/dedup_metrics.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/dedup_metrics.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "preprocessing/sentieon_consensus", + "preprocessing/sentieon_consensus/test", + "preprocessing/sentieon_consensus/test/test.consensus.cram", + "preprocessing/sentieon_consensus/test/test.consensus.cram.crai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.dedup.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.dedup.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.dedup.mosdepth.summary.txt", + "reports/mosdepth/test/test.dedup.regions.bed.gz", + "reports/mosdepth/test/test.dedup.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.dedup.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/sentieon_dedup", + "reports/sentieon_dedup/test", + "reports/sentieon_dedup/test/test.consensus.cram.metrics", + "reports/sentieon_dedup/test/test.consensus.cram.metrics.multiqc.tsv" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,29c717bad585e63514e92b688cb8022a", + "mosdepth-cumcoverage-dist-id.txt:md5,6e9ebec35c2035b9cf63fdb1079ba133", + "mosdepth_perchrom.txt:md5,29c717bad585e63514e92b688cb8022a", + "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", + "multiqc_dedup_metrics.txt:md5,b2aea3c46773a3422dcf98ab6a8a31a1", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,36c50f0e76e2cdb1476248b8bffa60f3", + "samtools_alignment_plot.txt:md5,511a3eba2589bb6790a4cfcd2a9c0d6c", + "test.dedup.mosdepth.global.dist.txt:md5,822c1f9c009f5436fefcfc9f5f7e6ee7", + "test.dedup.mosdepth.region.dist.txt:md5,1395b75e88a291268b96ec38f63c033e", + "test.dedup.mosdepth.summary.txt:md5,17f1b050f76ba0bfc96cbd045b627cd4", + "test.dedup.regions.bed.gz:md5,175dba70847fa0f2e5a44a014fa91027", + "test.dedup.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "test.recal.mosdepth.global.dist.txt:md5,822c1f9c009f5436fefcfc9f5f7e6ee7", + "test.recal.mosdepth.region.dist.txt:md5,1395b75e88a291268b96ec38f63c033e", + "test.recal.mosdepth.summary.txt:md5,17f1b050f76ba0bfc96cbd045b627cd4", + "test.recal.regions.bed.gz:md5,175dba70847fa0f2e5a44a014fa91027", + "test.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259" + ], + "No BAM files", + [ + "test.recal.cram:md5,aa729f5aa32977f1714c0ba24f3aa1eb", + "test.consensus.cram:md5,aa729f5aa32977f1714c0ba24f3aa1eb" + ], + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-08-15T10:02:22.395300103" + }, + "-profile test --tools sentieon_dedup --step markduplicates --input tests/csv/3.0/bam_umi_header.csv --umi_in_read_header": { + "content": [ + 14, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FGBIO_COPYUMIFROMREADNAME": { + "fgbio": "2.4.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SENTIEON_DEDUP": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_dedup_metrics.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/dedup_metrics.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/dedup_metrics.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/dedup_metrics.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "preprocessing/sentieon_dedup", + "preprocessing/sentieon_dedup/test", + "preprocessing/sentieon_dedup/test/test.dedup.cram", + "preprocessing/sentieon_dedup/test/test.dedup.cram.crai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.dedup.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.dedup.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.dedup.mosdepth.summary.txt", + "reports/mosdepth/test/test.dedup.regions.bed.gz", + "reports/mosdepth/test/test.dedup.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.dedup.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/sentieon_dedup", + "reports/sentieon_dedup/test", + "reports/sentieon_dedup/test/test.dedup.cram.metrics", + "reports/sentieon_dedup/test/test.dedup.cram.metrics.multiqc.tsv" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,400b54460251c0fa2d2bc79c77c37ee3", + "mosdepth-cumcoverage-dist-id.txt:md5,6b703211a2779819920b7d4b25bdc638", + "mosdepth_perchrom.txt:md5,400b54460251c0fa2d2bc79c77c37ee3", + "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", + "multiqc_dedup_metrics.txt:md5,b2aea3c46773a3422dcf98ab6a8a31a1", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,a416a2f4042ce91471192ee33bab23ad", + "samtools_alignment_plot.txt:md5,cf28e9513f09390047f12f904114a50c", + "test.dedup.mosdepth.global.dist.txt:md5,71d52337c3c971849645655e3203f9e0", + "test.dedup.mosdepth.region.dist.txt:md5,d985616d0cb8d74ff6448601d9a03318", + "test.dedup.mosdepth.summary.txt:md5,c01b82249d58f2f2c0d7473c588ffaf4", + "test.dedup.regions.bed.gz:md5,b49899dadd40a3b54b0c43033d740aa8", + "test.dedup.regions.bed.gz.csi:md5,5c556fe6462be13681267bb28257b654", + "test.recal.mosdepth.global.dist.txt:md5,71d52337c3c971849645655e3203f9e0", + "test.recal.mosdepth.region.dist.txt:md5,d985616d0cb8d74ff6448601d9a03318", + "test.recal.mosdepth.summary.txt:md5,c01b82249d58f2f2c0d7473c588ffaf4", + "test.recal.regions.bed.gz:md5,b49899dadd40a3b54b0c43033d740aa8", + "test.recal.regions.bed.gz.csi:md5,5c556fe6462be13681267bb28257b654" + ], + "No BAM files", + [ + "test.recal.cram:md5,2f93d26f230611260a7497a4913af443", + "test.dedup.cram:md5,2f93d26f230611260a7497a4913af443" + ], + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-08-15T09:50:20.120218465" + } +} diff --git a/tests/spark.nf.test b/tests/spark.nf.test new file mode 100644 index 0000000000..4b9c2bac68 --- /dev/null +++ b/tests/spark.nf.test @@ -0,0 +1,33 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + profile "test,spark" + + def test_scenario = [ + [ + name: "-profile test,spark --input tests/csv/3.0/fastq_tumor_only.csv --use_gatk_spark baserecalibrator,markduplicates", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_tumor_only.csv", + tools: null, + use_gatk_spark: 'baserecalibrator,markduplicates' + ] + ], + [ + name: "-profile test,spark --input tests/csv/3.0/fastq_tumor_only.csv --use_gatk_spark baserecalibrator,markduplicates --skip_tools fastqc,markduplicates_report,mosdepth,multiqc,samtools", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_tumor_only.csv", + tools: null, + use_gatk_spark: 'baserecalibrator,markduplicates', + skip_tools: 'fastqc,markduplicates_report,mosdepth,multiqc,samtools' + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/spark.nf.test.snap b/tests/spark.nf.test.snap new file mode 100644 index 0000000000..dce507def6 --- /dev/null +++ b/tests/spark.nf.test.snap @@ -0,0 +1,324 @@ +{ + "-profile test,spark --input tests/csv/3.0/fastq_tumor_only.csv --use_gatk_spark baserecalibrator,markduplicates": { + "content": [ + 18, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "GATK4SPARK_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4SPARK_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4SPARK_MARKDUPLICATES": { + "gatk4": "4.6.1.0" + }, + "GATK4_ESTIMATELIBRARYCOMPLEXITY": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "INDEX_MARKDUPLICATES": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test2", + "preprocessing/markduplicates/test2/test2.md.cram", + "preprocessing/markduplicates/test2/test2.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test2", + "preprocessing/recal_table/test2/test2.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test2", + "preprocessing/recalibrated/test2/test2.recal.cram", + "preprocessing/recalibrated/test2/test2.recal.cram.crai", + "reference", + "reports", + "reports/fastqc", + "reports/fastqc/test2-test_L1", + "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.html", + "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.zip", + "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.html", + "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test2", + "reports/markduplicates/test2/test2.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test2", + "reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt", + "reports/mosdepth/test2/test2.md.mosdepth.region.dist.txt", + "reports/mosdepth/test2/test2.md.mosdepth.summary.txt", + "reports/mosdepth/test2/test2.md.regions.bed.gz", + "reports/mosdepth/test2/test2.md.regions.bed.gz.csi", + "reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test2/test2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test2/test2.recal.mosdepth.summary.txt", + "reports/mosdepth/test2/test2.recal.regions.bed.gz", + "reports/mosdepth/test2/test2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test2", + "reports/samtools/test2/test2.md.cram.stats", + "reports/samtools/test2/test2.recal.cram.stats" + ], + [ + "fastqc-status-check-heatmap.txt:md5,b82724a03a6ec93ee44fe855f34025d7", + "fastqc_adapter_content_plot.txt:md5,aacb772b9a0141acc4ab43287172d6f4", + "fastqc_per_base_n_content_plot.txt:md5,31bbffa022c118bc9a3ac18973b54775", + "fastqc_per_base_sequence_quality_plot.txt:md5,1ac8402bd8a31f6002ecf1882cc9639e", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,f57d340bbd9ca796a0b336bae53aa4d4", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,1aaa2f08d367f35108c469d6cdc72773", + "fastqc_per_sequence_quality_scores_plot.txt:md5,e56fb8c4dc6b366afd58be19058e7118", + "fastqc_sequence_counts_plot.txt:md5,e96ed3d75a347dfe3c36a4055ea5a7f6", + "fastqc_sequence_duplication_levels_plot.txt:md5,90fc887e4afd33c3a72200bd854de2a5", + "fastqc_sequence_length_distribution_plot.txt:md5,6373d807db4406a7c077df2262ec6e26", + "mosdepth-coverage-per-contig-single.txt:md5,3a7533e81978d33159d27b18cd337565", + "mosdepth-cumcoverage-dist-id.txt:md5,2f16aae68ba6e7c486f6fd9ae8c46c80", + "mosdepth_perchrom.txt:md5,3a7533e81978d33159d27b18cd337565", + "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", + "multiqc_fastqc.txt:md5,d54614f8b2de16511949dbbde8f38a5d", + "samtools-stats-dp.txt:md5,440993b962f6c557cccfa2974648e1ec", + "samtools_alignment_plot.txt:md5,46e8824724863d243a01cc239a5eff15", + "test2.md.mosdepth.global.dist.txt:md5,85d38a74ce189b9110c57cd94bc26757", + "test2.md.mosdepth.region.dist.txt:md5,286d57b7d9b3a95ef18ab2eb7f913d81", + "test2.md.mosdepth.summary.txt:md5,04b69ef7f00199dcea7822a79d2c7bd7", + "test2.md.regions.bed.gz:md5,292e177aa997597f83dc4e84bcc36b4c", + "test2.md.regions.bed.gz.csi:md5,5bf5fc178e4faf2462427502c3666004", + "test2.recal.mosdepth.global.dist.txt:md5,85d38a74ce189b9110c57cd94bc26757", + "test2.recal.mosdepth.region.dist.txt:md5,286d57b7d9b3a95ef18ab2eb7f913d81", + "test2.recal.mosdepth.summary.txt:md5,04b69ef7f00199dcea7822a79d2c7bd7", + "test2.recal.regions.bed.gz:md5,292e177aa997597f83dc4e84bcc36b4c", + "test2.recal.regions.bed.gz.csi:md5,5bf5fc178e4faf2462427502c3666004" + ], + "No BAM files", + [ + "test2.md.cram:md5,b186d05619be54618852b198c1b19198", + "test2.recal.cram:md5,c1eec6185153a1044bd296a1db065f7d" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998513#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T21:32:22.98324443" + }, + "-profile test,spark --input tests/csv/3.0/fastq_tumor_only.csv --use_gatk_spark baserecalibrator,markduplicates --skip_tools fastqc,markduplicates_report,mosdepth,multiqc,samtools": { + "content": [ + 11, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4SPARK_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4SPARK_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4SPARK_MARKDUPLICATES": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "INDEX_MARKDUPLICATES": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test2", + "preprocessing/markduplicates/test2/test2.md.cram", + "preprocessing/markduplicates/test2/test2.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test2", + "preprocessing/recal_table/test2/test2.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test2", + "preprocessing/recalibrated/test2/test2.recal.cram", + "preprocessing/recalibrated/test2/test2.recal.cram.crai", + "reference" + ], + "No stable content", + "No BAM files", + [ + "test2.md.cram:md5,b186d05619be54618852b198c1b19198", + "test2.recal.cram:md5,c1eec6185153a1044bd296a1db065f7d" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998513#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T21:33:59.148271232" + } +} diff --git a/tests/start_from_markduplicates.nf.test b/tests/start_from_markduplicates.nf.test new file mode 100644 index 0000000000..037bd84cb6 --- /dev/null +++ b/tests/start_from_markduplicates.nf.test @@ -0,0 +1,52 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --input tests/csv/3.0/mapped_single_bam.csv --step markduplicates --skip_tools markduplicates --tools null", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_bam.csv", + step: 'markduplicates', + skip_tools: "markduplicates", + tools: null + ] + ], + [ + name: "-profile test --input tests/csv/3.0/mapped_single_bam.csv --step markduplicates --tools null", + params: [ + modules_testdata_base_path: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/', + input: "${projectDir}/tests/csv/3.0/mapped_single_bam.csv", + step: 'markduplicates', + tools: null + ] + ], + [ + name: "-profile test --input tests/csv/3.0/mapped_single_cram.csv --step markduplicates --skip_tools markduplicates --tools null", + params: [ + modules_testdata_base_path: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/', + input: "${projectDir}/tests/csv/3.0/mapped_single_cram.csv", + step: 'markduplicates', + skip_tools: "markduplicates", + tools: null + ] + ], + [ + name: "-profile test --input tests/csv/3.0/mapped_single_cram.csv --step markduplicates --tools null", + params: [ + modules_testdata_base_path: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/', + input: "${projectDir}/tests/csv/3.0/mapped_single_cram.csv", + step: 'markduplicates', + tools: null + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/start_from_markduplicates.nf.test.snap b/tests/start_from_markduplicates.nf.test.snap new file mode 100644 index 0000000000..34aa8c3aea --- /dev/null +++ b/tests/start_from_markduplicates.nf.test.snap @@ -0,0 +1,666 @@ +{ + "-profile test --input tests/csv/3.0/mapped_single_bam.csv --step markduplicates --tools null": { + "content": [ + 13, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,7f0ae813d12ab461548ef47423186e1d", + "mosdepth-cumcoverage-dist-id.txt:md5,e6e8b8da4af8aef64278024fc25361dd", + "mosdepth_perchrom.txt:md5,7f0ae813d12ab461548ef47423186e1d", + "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,d75ab155c53a8469919bee1c9808eb4b", + "samtools_alignment_plot.txt:md5,7a727168e9bbb3f4c39894933438df98", + "test.md.mosdepth.global.dist.txt:md5,8e875e20e3fb9cf288d68c1d223f6fd5", + "test.md.mosdepth.region.dist.txt:md5,75e1ce7e55af51f4985fa91654a5ea2d", + "test.md.mosdepth.summary.txt:md5,b23cf96942b2ada3f41172a9349a1175", + "test.md.regions.bed.gz:md5,74cd0c779c7b3228adcf3b177333886a", + "test.md.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917", + "test.recal.mosdepth.global.dist.txt:md5,8e875e20e3fb9cf288d68c1d223f6fd5", + "test.recal.mosdepth.region.dist.txt:md5,75e1ce7e55af51f4985fa91654a5ea2d", + "test.recal.mosdepth.summary.txt:md5,b23cf96942b2ada3f41172a9349a1175", + "test.recal.regions.bed.gz:md5,74cd0c779c7b3228adcf3b177333886a", + "test.recal.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917" + ], + "No BAM files", + [ + "test.md.cram:md5,2f11e4fe3390b8ad0a1852616fd1da04", + "test.recal.cram:md5,463ac3b905fbf4ddf113a94dbfa8d69f" + ], + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T21:45:52.52962218" + }, + "-profile test --input tests/csv/3.0/mapped_single_cram.csv --step markduplicates --tools null": { + "content": [ + 13, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,7f0ae813d12ab461548ef47423186e1d", + "mosdepth-cumcoverage-dist-id.txt:md5,e6e8b8da4af8aef64278024fc25361dd", + "mosdepth_perchrom.txt:md5,7f0ae813d12ab461548ef47423186e1d", + "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,d75ab155c53a8469919bee1c9808eb4b", + "samtools_alignment_plot.txt:md5,7a727168e9bbb3f4c39894933438df98", + "test.md.mosdepth.global.dist.txt:md5,8e875e20e3fb9cf288d68c1d223f6fd5", + "test.md.mosdepth.region.dist.txt:md5,75e1ce7e55af51f4985fa91654a5ea2d", + "test.md.mosdepth.summary.txt:md5,b23cf96942b2ada3f41172a9349a1175", + "test.md.regions.bed.gz:md5,74cd0c779c7b3228adcf3b177333886a", + "test.md.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917", + "test.recal.mosdepth.global.dist.txt:md5,8e875e20e3fb9cf288d68c1d223f6fd5", + "test.recal.mosdepth.region.dist.txt:md5,75e1ce7e55af51f4985fa91654a5ea2d", + "test.recal.mosdepth.summary.txt:md5,b23cf96942b2ada3f41172a9349a1175", + "test.recal.regions.bed.gz:md5,74cd0c779c7b3228adcf3b177333886a", + "test.recal.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917" + ], + "No BAM files", + [ + "test.md.cram:md5,2f11e4fe3390b8ad0a1852616fd1da04", + "test.recal.cram:md5,463ac3b905fbf4ddf113a94dbfa8d69f" + ], + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T21:48:22.238914133" + }, + "-profile test --input tests/csv/3.0/mapped_single_cram.csv --step markduplicates --skip_tools markduplicates --tools null": { + "content": [ + 12, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.sorted.mosdepth.summary.txt", + "reports/mosdepth/test/test.sorted.regions.bed.gz", + "reports/mosdepth/test/test.sorted.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/samtools/test/test.sorted.cram.stats" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,4153678ed78ce01defe11f5b9fb8deb4", + "mosdepth-cumcoverage-dist-id.txt:md5,fcecb7f7ed584a1395065568e0ec1259", + "mosdepth_perchrom.txt:md5,4153678ed78ce01defe11f5b9fb8deb4", + "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", + "samtools-stats-dp.txt:md5,dae2fa55a81049479a787e0fe5a64289", + "samtools_alignment_plot.txt:md5,b693942009526869bd21cbc42d54f53c", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "test.sorted.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.sorted.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.sorted.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.sorted.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.sorted.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259" + ], + "No BAM files", + [ + "test.recal.cram:md5,463ac3b905fbf4ddf113a94dbfa8d69f" + ], + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T21:47:02.418026538" + }, + "-profile test --input tests/csv/3.0/mapped_single_bam.csv --step markduplicates --skip_tools markduplicates --tools null": { + "content": [ + 12, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.sorted.mosdepth.summary.txt", + "reports/mosdepth/test/test.sorted.regions.bed.gz", + "reports/mosdepth/test/test.sorted.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/samtools/test/test.sorted.cram.stats" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,4153678ed78ce01defe11f5b9fb8deb4", + "mosdepth-cumcoverage-dist-id.txt:md5,fcecb7f7ed584a1395065568e0ec1259", + "mosdepth_perchrom.txt:md5,4153678ed78ce01defe11f5b9fb8deb4", + "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", + "samtools-stats-dp.txt:md5,dae2fa55a81049479a787e0fe5a64289", + "samtools_alignment_plot.txt:md5,b693942009526869bd21cbc42d54f53c", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "test.sorted.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.sorted.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.sorted.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.sorted.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.sorted.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259" + ], + "No BAM files", + [ + "test.recal.cram:md5,463ac3b905fbf4ddf113a94dbfa8d69f" + ], + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T21:44:33.781719225" + } +} diff --git a/tests/start_from_preparerecalibration.nf.test b/tests/start_from_preparerecalibration.nf.test new file mode 100644 index 0000000000..567690f78e --- /dev/null +++ b/tests/start_from_preparerecalibration.nf.test @@ -0,0 +1,50 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --input tests/csv/3.0/mapped_single_bam.csv --step prepare_recalibration --tools null", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_bam.csv", + step: 'prepare_recalibration', + tools: null + ] + ], + [ + name: "-profile test --input tests/csv/3.0/mapped_single_bam.csv --step prepare_recalibration --skip_tools baserecalibrator --tools strelka", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_bam.csv", + step: 'prepare_recalibration', + skip_tools: "baserecalibrator", + tools: 'strelka' + ] + ], + [ + name: "-profile test --input tests/csv/3.0/mapped_single_cram.csv --step prepare_recalibration --skip_tools baserecalibrator --tools strelka", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_cram.csv", + step: 'prepare_recalibration', + skip_tools: "baserecalibrator", + tools: 'strelka' + ] + ], + [ + name: "-profile test --input tests/csv/3.0/mapped_single_cram.csv --step prepare_recalibration --tools null", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_cram.csv", + step: 'prepare_recalibration', + tools: null + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/start_from_preparerecalibration.nf.test.snap b/tests/start_from_preparerecalibration.nf.test.snap new file mode 100644 index 0000000000..4f0ebc50d2 --- /dev/null +++ b/tests/start_from_preparerecalibration.nf.test.snap @@ -0,0 +1,538 @@ +{ + "-profile test --input tests/csv/3.0/mapped_single_bam.csv --step prepare_recalibration --tools null": { + "content": [ + 10, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,115b9a93bcf511419e6622919172f321", + "mosdepth-cumcoverage-dist-id.txt:md5,df2fc825dfc41e30f72f5d125a6447ee", + "mosdepth_perchrom.txt:md5,115b9a93bcf511419e6622919172f321", + "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", + "samtools-stats-dp.txt:md5,c8deaa643c44fd800cbf14ac35fb7719", + "samtools_alignment_plot.txt:md5,8908b56b3040f2c53b15591adf93a266", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259" + ], + "No BAM files", + [ + "test.recal.cram:md5,463ac3b905fbf4ddf113a94dbfa8d69f" + ], + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T21:49:34.278947907" + }, + "-profile test --input tests/csv/3.0/mapped_single_cram.csv --step prepare_recalibration --skip_tools baserecalibrator --tools strelka": { + "content": [ + 10, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/test", + "reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/test", + "reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.count", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/test", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi" + ], + [ + "multiqc_citations.txt:md5,ac2b3cf2dfb12c40837b9bbad8112d86", + "test.strelka.variants.bcftools_stats.txt:md5,bffd4c0cf553a42c5b183220d71a1466", + "test.strelka.variants.FILTER.summary:md5,39ff2cc8eb7495a14a6b76e0ab627027", + "test.strelka.variants.TsTv.count:md5,ee7dafc8d941b8502a04a63dc3126fff" + ], + "No BAM files", + "No CRAM files", + [ + "test.strelka.genome.vcf.gz:md5,325ea84ccf1ec1cf7a6c5c4aae1c0d1", + "test.strelka.variants.vcf.gz:md5,3a3b3e67614ec91f4f0b88999dff454b" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T21:51:32.157567526" + }, + "-profile test --input tests/csv/3.0/mapped_single_bam.csv --step prepare_recalibration --skip_tools baserecalibrator --tools strelka": { + "content": [ + 10, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/test", + "reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/test", + "reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.count", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/test", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi" + ], + [ + "multiqc_citations.txt:md5,ac2b3cf2dfb12c40837b9bbad8112d86", + "test.strelka.variants.bcftools_stats.txt:md5,bffd4c0cf553a42c5b183220d71a1466", + "test.strelka.variants.FILTER.summary:md5,39ff2cc8eb7495a14a6b76e0ab627027", + "test.strelka.variants.TsTv.count:md5,ee7dafc8d941b8502a04a63dc3126fff" + ], + "No BAM files", + "No CRAM files", + [ + "test.strelka.genome.vcf.gz:md5,325ea84ccf1ec1cf7a6c5c4aae1c0d1", + "test.strelka.variants.vcf.gz:md5,3a3b3e67614ec91f4f0b88999dff454b" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T21:50:33.221939876" + }, + "-profile test --input tests/csv/3.0/mapped_single_cram.csv --step prepare_recalibration --tools null": { + "content": [ + 10, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,115b9a93bcf511419e6622919172f321", + "mosdepth-cumcoverage-dist-id.txt:md5,df2fc825dfc41e30f72f5d125a6447ee", + "mosdepth_perchrom.txt:md5,115b9a93bcf511419e6622919172f321", + "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", + "samtools-stats-dp.txt:md5,c8deaa643c44fd800cbf14ac35fb7719", + "samtools_alignment_plot.txt:md5,8908b56b3040f2c53b15591adf93a266", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259" + ], + "No BAM files", + [ + "test.recal.cram:md5,463ac3b905fbf4ddf113a94dbfa8d69f" + ], + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T21:52:41.43434759" + } +} diff --git a/tests/start_from_recalibration.nf.test b/tests/start_from_recalibration.nf.test new file mode 100644 index 0000000000..0f953d5113 --- /dev/null +++ b/tests/start_from_recalibration.nf.test @@ -0,0 +1,52 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --input tests/csv/3.0/mapped_single_cram.csv --step recalibrate --tools null", + params: [ + + input: "${projectDir}/tests/csv/3.0/prepare_recalibration_single_cram.csv", + step: 'recalibrate', + tools: null + ] + ], + [ + name: "-profile test --input tests/csv/3.0/mapped_single_bam.csv --step recalibrate --skip_tools baserecalibrator --tools strelka", + params: [ + + input: "${projectDir}/tests/csv/3.0/prepare_recalibration_single_bam.csv", + step: 'recalibrate', + skip_tools: "baserecalibrator", + tools: 'strelka' + ] + ], + [ + name: "-profile test --input tests/csv/3.0/mapped_single_cram.csv --step recalibrate --skip_tools baserecalibrator --tools strelka", + params: [ + + input: "${projectDir}/tests/csv/3.0/prepare_recalibration_single_cram.csv", + step: 'recalibrate', + skip_tools: "baserecalibrator", + tools: 'strelka' + ] + ], + [ + name: "-profile test --input tests/csv/3.0/mapped_single_bam.csv --step recalibrate --tools null", + params: [ + + input: "${projectDir}/tests/csv/3.0/prepare_recalibration_single_bam.csv", + step: 'recalibrate', + tools: null + ] + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/start_from_recalibration.nf.test.snap b/tests/start_from_recalibration.nf.test.snap new file mode 100644 index 0000000000..8f28705103 --- /dev/null +++ b/tests/start_from_recalibration.nf.test.snap @@ -0,0 +1,498 @@ +{ + "-profile test --input tests/csv/3.0/mapped_single_bam.csv --step recalibrate --tools null": { + "content": [ + 9, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,115b9a93bcf511419e6622919172f321", + "mosdepth-cumcoverage-dist-id.txt:md5,df2fc825dfc41e30f72f5d125a6447ee", + "mosdepth_perchrom.txt:md5,115b9a93bcf511419e6622919172f321", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "samtools-stats-dp.txt:md5,c8deaa643c44fd800cbf14ac35fb7719", + "samtools_alignment_plot.txt:md5,8908b56b3040f2c53b15591adf93a266", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259" + ], + "No BAM files", + [ + "test.recal.cram:md5,463ac3b905fbf4ddf113a94dbfa8d69f" + ], + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T21:56:40.285535719" + }, + "-profile test --input tests/csv/3.0/mapped_single_cram.csv --step recalibrate --skip_tools baserecalibrator --tools strelka": { + "content": [ + 10, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/test", + "reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/test", + "reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.count", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/test", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi" + ], + [ + "multiqc_citations.txt:md5,ac2b3cf2dfb12c40837b9bbad8112d86", + "test.strelka.variants.bcftools_stats.txt:md5,bffd4c0cf553a42c5b183220d71a1466", + "test.strelka.variants.FILTER.summary:md5,39ff2cc8eb7495a14a6b76e0ab627027", + "test.strelka.variants.TsTv.count:md5,ee7dafc8d941b8502a04a63dc3126fff" + ], + "No BAM files", + "No CRAM files", + [ + "test.strelka.genome.vcf.gz:md5,325ea84ccf1ec1cf7a6c5c4aae1c0d1", + "test.strelka.variants.vcf.gz:md5,3a3b3e67614ec91f4f0b88999dff454b" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T21:55:41.4762897" + }, + "-profile test --input tests/csv/3.0/mapped_single_bam.csv --step recalibrate --skip_tools baserecalibrator --tools strelka": { + "content": [ + 10, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/test", + "reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/test", + "reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.count", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/test", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi" + ], + [ + "multiqc_citations.txt:md5,ac2b3cf2dfb12c40837b9bbad8112d86", + "test.strelka.variants.bcftools_stats.txt:md5,bffd4c0cf553a42c5b183220d71a1466", + "test.strelka.variants.FILTER.summary:md5,39ff2cc8eb7495a14a6b76e0ab627027", + "test.strelka.variants.TsTv.count:md5,ee7dafc8d941b8502a04a63dc3126fff" + ], + "No BAM files", + "No CRAM files", + [ + "test.strelka.genome.vcf.gz:md5,325ea84ccf1ec1cf7a6c5c4aae1c0d1", + "test.strelka.variants.vcf.gz:md5,3a3b3e67614ec91f4f0b88999dff454b" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T21:54:41.033469111" + }, + "-profile test --input tests/csv/3.0/mapped_single_cram.csv --step recalibrate --tools null": { + "content": [ + 9, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,115b9a93bcf511419e6622919172f321", + "mosdepth-cumcoverage-dist-id.txt:md5,df2fc825dfc41e30f72f5d125a6447ee", + "mosdepth_perchrom.txt:md5,115b9a93bcf511419e6622919172f321", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "samtools-stats-dp.txt:md5,c8deaa643c44fd800cbf14ac35fb7719", + "samtools_alignment_plot.txt:md5,8908b56b3040f2c53b15591adf93a266", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259" + ], + "No BAM files", + [ + "test.recal.cram:md5,463ac3b905fbf4ddf113a94dbfa8d69f" + ], + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T21:53:40.326206561" + } +} diff --git a/tests/subworkflows/local/annotate/main.nf b/tests/subworkflows/local/annotate/main.nf deleted file mode 100644 index a1138bd994..0000000000 --- a/tests/subworkflows/local/annotate/main.nf +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -// Don't overwrite global params.modules, create a copy instead and use that within the main script. -def modules = params.modules.clone() - -include { ANNOTATE } from '../../../../subworkflows/local/annotate' addParams( - annotation_cache: false, - bgziptabix_merge_vep_options: modules['bgziptabix_merge_vep'], - bgziptabix_snpeff_options: modules['bgziptabix_snpeff'], - bgziptabix_vep_options: modules['bgziptabix_vep'], - merge_vep_options: modules['merge_vep'], - snpeff_options: modules['snpeff'], - snpeff_tag: "${modules['snpeff'].tag_base}.WBcel235", - vep_options: modules['vep'], - vep_tag: "${modules['vep'].tag_base}.WBcel235" -) - -workflow test_annotate { - input = [[id: 'test'], - [file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)]] - - ANNOTATE( - input, - ["snpeff","vep","merge"], - "WBcel235.99", - [], - "WBcel235", - "caenorhabditis_elegans", - "104", - []) -} diff --git a/tests/subworkflows/local/annotate/test.yml b/tests/subworkflows/local/annotate/test.yml deleted file mode 100644 index a0f9caf99b..0000000000 --- a/tests/subworkflows/local/annotate/test.yml +++ /dev/null @@ -1,11 +0,0 @@ -- name: subworkflow annotate - command: nextflow run ./tests/subworkflows/local/annotate/ -entry test_annotate -c tests/config/nextflow.config - tags: - - annotate - files: - - path: output/annotation/test/test_snpEff.ann.gz - - path: output/annotation/test/test_snpEff.ann.gz.tbi - - path: output/annotation/test/test_snpEff_VEP.ann.gz - - path: output/annotation/test/test_snpEff_VEP.ann.gz.tbi - - path: output/annotation/test/test_VEP.ann.gz - - path: output/annotation/test/test_VEP.ann.gz.tbi diff --git a/tests/subworkflows/nf-core/markduplicates/main.nf b/tests/subworkflows/nf-core/markduplicates/main.nf deleted file mode 100644 index 2e61829e0a..0000000000 --- a/tests/subworkflows/nf-core/markduplicates/main.nf +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -// Don't overwrite global params.modules, create a copy instead and use that within the main script. -def modules = params.modules.clone() - -include { MARKDUPLICATES } from '../../../../subworkflows/nf-core/markduplicates' addParams( - markduplicates_options: modules['markduplicates'], - markduplicatesspark_options: modules['markduplicatesspark'] -) - -workflow test_markduplicates { - input = [[id: 'test'], - [file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)], - [file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)]] - - MARKDUPLICATES ( input, false, true ) -} diff --git a/tests/subworkflows/nf-core/markduplicates/test.yml b/tests/subworkflows/nf-core/markduplicates/test.yml deleted file mode 100644 index 427736fe2d..0000000000 --- a/tests/subworkflows/nf-core/markduplicates/test.yml +++ /dev/null @@ -1,8 +0,0 @@ -- name: subworkflow markduplicates - command: nextflow run ./tests/subworkflows/nf-core/markduplicates/ -entry test_markduplicates -c tests/config/nextflow.config - tags: - - markduplicates - - gatk4 - files: - - path: output/preprocessing/test/markduplicates/test.md.bam - - path: output/preprocessing/test/markduplicates/test.md.bam.bai diff --git a/tests/subworkflows/nf-core/snpeff_annotate/main.nf b/tests/subworkflows/nf-core/snpeff_annotate/main.nf deleted file mode 100644 index 56af76c5e6..0000000000 --- a/tests/subworkflows/nf-core/snpeff_annotate/main.nf +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -// Don't overwrite global params.modules, create a copy instead and use that within the main script. -def modules = params.modules.clone() - -include { SNPEFF_ANNOTATE } from '../../../../subworkflows/nf-core/snpeff_annotate' addParams( - bgziptabix_snpeff_options: modules['bgziptabix_snpeff'], - snpeff_options: modules['snpeff'], - snpeff_tag: "${modules['snpeff'].tag_base}.WBcel235", - use_cache: false -) - -workflow test_snpeff_annotate { - input = [[id: 'test'], - [file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)]] - - SNPEFF_ANNOTATE ( - input, - "WBcel235.99", - []) -} diff --git a/tests/subworkflows/nf-core/snpeff_annotate/test.yml b/tests/subworkflows/nf-core/snpeff_annotate/test.yml deleted file mode 100644 index 7d9c8c087c..0000000000 --- a/tests/subworkflows/nf-core/snpeff_annotate/test.yml +++ /dev/null @@ -1,7 +0,0 @@ -- name: subworkflow snpeff_annotate - command: nextflow run ./tests/subworkflows/nf-core/snpeff_annotate/ -entry test_snpeff_annotate -c tests/config/nextflow.config - tags: - - snpeff_annotate - files: - - path: output/annotation/test/test_snpEff.ann.gz - - path: output/annotation/test/test_snpEff.ann.gz.tbi \ No newline at end of file diff --git a/tests/subworkflows/nf-core/vep_annotate/main.nf b/tests/subworkflows/nf-core/vep_annotate/main.nf deleted file mode 100644 index 268180ef3c..0000000000 --- a/tests/subworkflows/nf-core/vep_annotate/main.nf +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -// Don't overwrite global params.modules, create a copy instead and use that within the main script. -def modules = params.modules.clone() - -include { VEP_ANNOTATE } from '../../../../subworkflows/nf-core/vep_annotate' addParams( - bgziptabix_vep_options: modules['bgziptabix_vep'], - use_cache: false, - vep_options: modules['vep'], - vep_tag: "${modules['vep'].tag_base}.WBcel235" -) - -workflow test_vep_annotate { - input = [[id: 'test'], - [file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)]] - - VEP_ANNOTATE ( - input, - "WBcel235", - "caenorhabditis_elegans", - "104", - []) -} diff --git a/tests/subworkflows/nf-core/vep_annotate/test.yml b/tests/subworkflows/nf-core/vep_annotate/test.yml deleted file mode 100644 index 35eb38830b..0000000000 --- a/tests/subworkflows/nf-core/vep_annotate/test.yml +++ /dev/null @@ -1,7 +0,0 @@ -- name: subworkflow vep_annotate - command: nextflow run ./tests/subworkflows/nf-core/vep_annotate/ -entry test_vep_annotate -c tests/config/nextflow.config - tags: - - vep_annotate - files: - - path: output/annotation/test/test_VEP.ann.gz - - path: output/annotation/test/test_VEP.ann.gz.tbi \ No newline at end of file diff --git a/tests/test_aligner.yml b/tests/test_aligner.yml deleted file mode 100644 index 1ced2a344b..0000000000 --- a/tests/test_aligner.yml +++ /dev/null @@ -1,24 +0,0 @@ -- name: Run bwa-mem2 - command: nextflow run main.nf -profile test,docker --aligner bwa-mem2 - tags: - - aligner - - bwa-mem2 - - preprocessing - files: - # - path: results/multiqc - - path: results/preprocessing/test/markduplicates/test.md.cram - - path: results/preprocessing/test/markduplicates/test.md.cram.crai - - path: results/preprocessing/test/recal_table/test.recal.table - - path: results/preprocessing/test/recalibrated/test.recal.cram - - path: results/preprocessing/test/recalibrated/test.recal.cram.crai - - path: results/preprocessing/csv/markduplicates.csv - - path: results/preprocessing/csv/markduplicates_test.csv - - path: results/preprocessing/csv/markduplicates_no_table.csv - - path: results/preprocessing/csv/markduplicates_no_table_test.csv - - path: results/preprocessing/csv/recalibrated.csv - - path: results/preprocessing/csv/recalibrated_test.csv - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal - - path: results/reports/samtools_stats/test/test.md.cram.stats - - path: results/reports/samtools_stats/test/test.recal.cram.stats diff --git a/tests/test_annotation.yml b/tests/test_annotation.yml deleted file mode 100644 index 282b4546bf..0000000000 --- a/tests/test_annotation.yml +++ /dev/null @@ -1,31 +0,0 @@ -- name: Run snpEff - command: nextflow run main.nf -profile test,annotation,docker --tools snpeff - tags: - - annotation - - snpeff - files: - - path: results/annotation/1234N/1234N_snpEff.ann.vcf.gz - - path: results/annotation/1234N/1234N_snpEff.ann.vcf.gz.tbi - # - path: results/multiqc -- name: Run VEP - command: nextflow run main.nf -profile test,annotation,docker --tools vep - tags: - - annotation - - vep - files: - - path: results/annotation/1234N/1234N_VEP.ann.vcf.gz - - path: results/annotation/1234N/1234N_VEP.ann.vcf.gz.tbi - # - path: results/multiqc -- name: Run snpEff followed by VEP - command: nextflow run main.nf -profile test,annotation,docker --tools merge - tags: - - annotation - - merge - - snpeff - - vep - files: - - path: results/annotation/1234N/1234N_snpEff.ann.vcf.gz - - path: results/annotation/1234N/1234N_snpEff.ann.vcf.gz.tbi - - path: results/annotation/1234N/1234N_snpEff_VEP.ann.vcf.gz - - path: results/annotation/1234N/1234N_snpEff_VEP.ann.vcf.gz.tbi - # - path: results/multiqc diff --git a/tests/test_default.yml b/tests/test_default.yml deleted file mode 100644 index 4af69d9531..0000000000 --- a/tests/test_default.yml +++ /dev/null @@ -1,23 +0,0 @@ -- name: Run default pipeline - command: nextflow run main.nf -profile test,docker - tags: - - default - - preprocessing - files: - # - path: results/multiqc - - path: results/preprocessing/test/markduplicates/test.md.cram - - path: results/preprocessing/test/markduplicates/test.md.cram.crai - - path: results/preprocessing/test/recal_table/test.recal.table - - path: results/preprocessing/test/recalibrated/test.recal.cram - - path: results/preprocessing/test/recalibrated/test.recal.cram.crai - - path: results/preprocessing/csv/markduplicates.csv - - path: results/preprocessing/csv/markduplicates_test.csv - - path: results/preprocessing/csv/markduplicates_no_table.csv - - path: results/preprocessing/csv/markduplicates_no_table_test.csv - - path: results/preprocessing/csv/recalibrated.csv - - path: results/preprocessing/csv/recalibrated_test.csv - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal - - path: results/reports/samtools_stats/test/test.md.cram.stats - - path: results/reports/samtools_stats/test/test.recal.cram.stats diff --git a/tests/test_gatk_spark.yml b/tests/test_gatk_spark.yml deleted file mode 100644 index d83149a954..0000000000 --- a/tests/test_gatk_spark.yml +++ /dev/null @@ -1,24 +0,0 @@ -- name: Run default pipeline with gatk_spark - command: nextflow run main.nf -profile test,use_gatk_spark,docker - tags: - - gatk4 - - gatk4_spark - - preprocessing - files: - # - path: results/multiqc - - path: results/preprocessing/test/markduplicates/test.md.cram - - path: results/preprocessing/test/markduplicates/test.md.cram.crai - - path: results/preprocessing/test/recal_table/test.recal.table - - path: results/preprocessing/test/recalibrated/test.recal.cram - - path: results/preprocessing/test/recalibrated/test.recal.cram.crai - - path: results/preprocessing/csv/markduplicates.csv - - path: results/preprocessing/csv/markduplicates_test.csv - - path: results/preprocessing/csv/markduplicates_no_table.csv - - path: results/preprocessing/csv/markduplicates_no_table_test.csv - - path: results/preprocessing/csv/recalibrated.csv - - path: results/preprocessing/csv/recalibrated_test.csv - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal - - path: results/reports/samtools_stats/test/test.md.cram.stats - - path: results/reports/samtools_stats/test/test.recal.cram.stats diff --git a/tests/test_pair.yml b/tests/test_pair.yml deleted file mode 100644 index 1020da8ba9..0000000000 --- a/tests/test_pair.yml +++ /dev/null @@ -1,36 +0,0 @@ -- name: Run default pipeline for tumor normal pair - command: nextflow run main.nf -profile test,pair,docker - tags: - - preprocessing - - tumor_normal_pair - files: - # - path: results/multiqc - - path: results/preprocessing/test/markduplicates/test.md.cram - - path: results/preprocessing/test/markduplicates/test.md.cram.crai - - path: results/preprocessing/test/recal_table/test.recal.table - - path: results/preprocessing/test/recalibrated/test.recal.cram - - path: results/preprocessing/test/recalibrated/test.recal.cram.crai - - path: results/preprocessing/test2/markduplicates/test2.md.cram - - path: results/preprocessing/test2/markduplicates/test2.md.cram.crai - - path: results/preprocessing/test2/recal_table/test2.recal.table - - path: results/preprocessing/test2/recalibrated/test2.recal.cram - - path: results/preprocessing/test2/recalibrated/test2.recal.cram.crai - - path: results/preprocessing/csv/markduplicates.csv - - path: results/preprocessing/csv/markduplicates_test.csv - - path: results/preprocessing/csv/markduplicates_test2.csv - - path: results/preprocessing/csv/markduplicates_no_table.csv - - path: results/preprocessing/csv/markduplicates_no_table_test.csv - - path: results/preprocessing/csv/markduplicates_no_table_test2.csv - - path: results/preprocessing/csv/recalibrated.csv - - path: results/preprocessing/csv/recalibrated_test.csv - - path: results/preprocessing/csv/recalibrated_test2.csv - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/fastqc/test2-test2_L1 - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal - - path: results/reports/qualimap/test2/test2.mapped - - path: results/reports/qualimap/test2/test2.recal - - path: results/reports/samtools_stats/test/test.md.cram.stats - - path: results/reports/samtools_stats/test/test.recal.cram.stats - - path: results/reports/samtools_stats/test2/test2.md.cram.stats - - path: results/reports/samtools_stats/test2/test2.recal.cram.stats diff --git a/tests/test_prepare_recalibration.yml b/tests/test_prepare_recalibration.yml deleted file mode 100644 index 9c6221ddce..0000000000 --- a/tests/test_prepare_recalibration.yml +++ /dev/null @@ -1,22 +0,0 @@ -- name: Run Prepare_recal - command: nextflow run main.nf -profile test,prepare_recalibration,docker - tags: - - prepare_recalibration - - preprocessing - files: - # - path: results/multiqc - - path: results/preprocessing/test/markduplicates/test.md.cram - - path: results/preprocessing/test/markduplicates/test.md.cram.crai - - path: results/preprocessing/test/recal_table/test.recal.table - - path: results/preprocessing/test/recalibrated/test.recal.cram - - path: results/preprocessing/test/recalibrated/test.recal.cram.crai - - path: results/preprocessing/csv/markduplicates.csv - - path: results/preprocessing/csv/markduplicates_test.csv - - path: results/preprocessing/csv/markduplicates_no_table.csv - - path: results/preprocessing/csv/markduplicates_no_table_test.csv - - path: results/preprocessing/csv/recalibrated.csv - - path: results/preprocessing/csv/recalibrated_test.csv - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal - - path: results/reports/samtools_stats/test/test.md.cram.stats - - path: results/reports/samtools_stats/test/test.recal.cram.stats diff --git a/tests/test_save_bam_mapped.yml b/tests/test_save_bam_mapped.yml deleted file mode 100644 index 2ff0bc16f9..0000000000 --- a/tests/test_save_bam_mapped.yml +++ /dev/null @@ -1,27 +0,0 @@ -- name: Run save_bam_mapped - command: nextflow run main.nf -profile test,docker --save_bam_mapped - tags: - - preprocessing - - save_bam_mapped - files: - # - path: results/multiqc - - path: results/preprocessing/test/mapped/test-test_L1.bam - - path: results/preprocessing/test/mapped/test-test_L1.bam.bai - - path: results/preprocessing/test/markduplicates/test.md.cram - - path: results/preprocessing/test/markduplicates/test.md.cram.crai - - path: results/preprocessing/test/recal_table/test.recal.table - - path: results/preprocessing/test/recalibrated/test.recal.cram - - path: results/preprocessing/test/recalibrated/test.recal.cram.crai - #- path: results/preprocessing/csv/mapped.csv - #- path: results/preprocessing/csv/mapped_test.csv - - path: results/preprocessing/csv/markduplicates.csv - - path: results/preprocessing/csv/markduplicates_test.csv - - path: results/preprocessing/csv/markduplicates_no_table.csv - - path: results/preprocessing/csv/markduplicates_no_table_test.csv - - path: results/preprocessing/csv/recalibrated.csv - - path: results/preprocessing/csv/recalibrated_test.csv - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal - - path: results/reports/samtools_stats/test/test.md.cram.stats - - path: results/reports/samtools_stats/test/test.recal.cram.stats diff --git a/tests/test_skip_markduplicates.yml b/tests/test_skip_markduplicates.yml deleted file mode 100644 index 5b8e5417e3..0000000000 --- a/tests/test_skip_markduplicates.yml +++ /dev/null @@ -1,50 +0,0 @@ -- name: Run default pipeline with skipping MarkDuplicates - command: nextflow run main.nf -profile test,docker,skip_markduplicates - tags: - - markduplicates - - preprocessing - - skip_markduplicates - files: - # - path: results/multiqc - - path: results/preprocessing/test/mapped/test-test_L1.bam - - path: results/preprocessing/test/mapped/test-test_L1.bam.bai - - path: results/preprocessing/test/recal_table/test.recal.table - - path: results/preprocessing/test/markduplicates/test.md.cram - - path: results/preprocessing/test/markduplicates/test.md.cram.crai - - path: results/preprocessing/test/recalibrated/test.recal.cram - - path: results/preprocessing/test/recalibrated/test.recal.cram.crai - - path: results/preprocessing/csv/markduplicates.csv - - path: results/preprocessing/csv/markduplicates_test.csv - - path: results/preprocessing/csv/markduplicates_no_table.csv - - path: results/preprocessing/csv/markduplicates_no_table_test.csv - - path: results/preprocessing/csv/recalibrated.csv - - path: results/preprocessing/csv/recalibrated_test.csv - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal - - path: results/reports/samtools_stats/test/test.md.cram.stats - - path: results/reports/samtools_stats/test/test.recal.cram.stats -- name: Run skip markduplicates preparerecal - command: nextflow run main.nf -profile test,docker,prepare_recalibration,skip_markduplicates - tags: - - markduplicates - - prepare_recalibration - - preprocessing - - skip_markduplicates - files: - # - path: results/multiqc - - path: results/preprocessing/test/recal_table/test.recal.table - - path: results/preprocessing/test/markduplicates/test.md.cram - - path: results/preprocessing/test/markduplicates/test.md.cram.crai - - path: results/preprocessing/test/recalibrated/test.recal.cram - - path: results/preprocessing/test/recalibrated/test.recal.cram.crai - - path: results/preprocessing/csv/markduplicates.csv - - path: results/preprocessing/csv/markduplicates_test.csv - - path: results/preprocessing/csv/markduplicates_no_table.csv - - path: results/preprocessing/csv/markduplicates_no_table_test.csv - - path: results/preprocessing/csv/recalibrated.csv - - path: results/preprocessing/csv/recalibrated_test.csv - #- path: results/reports/qualimap/test/test.mapped - #- path: results/reports/qualimap/test/test.recal - - path: results/reports/samtools_stats/test/test.md.cram.stats - - path: results/reports/samtools_stats/test/test.recal.cram.stats diff --git a/tests/test_split_fastq.yml b/tests/test_split_fastq.yml deleted file mode 100644 index 46bb5804c3..0000000000 --- a/tests/test_split_fastq.yml +++ /dev/null @@ -1,9 +0,0 @@ -- name: Run split fastq module - command: nextflow run main.nf -profile test,split_fastq,docker - tags: - - split_fastq - files: - - path: results/seqkit/test-test_L1/test_1.part_001.fastq.gz - - path: results/seqkit/test-test_L1/test_1.part_002.fastq.gz - - path: results/seqkit/test-test_L1/test_2.part_001.fastq.gz - - path: results/seqkit/test-test_L1/test_2.part_002.fastq.gz diff --git a/tests/test_targeted.yml b/tests/test_targeted.yml deleted file mode 100644 index 8980ed4fa2..0000000000 --- a/tests/test_targeted.yml +++ /dev/null @@ -1,23 +0,0 @@ -- name: Run default pipeline with target bed - command: nextflow run main.nf -profile test,targeted,docker - tags: - - preprocessing - - targeted - files: - # - path: results/multiqc - - path: results/preprocessing/test/markduplicates/test.md.cram - - path: results/preprocessing/test/markduplicates/test.md.cram.crai - - path: results/preprocessing/test/recal_table/test.recal.table - - path: results/preprocessing/test/recalibrated/test.recal.cram - - path: results/preprocessing/test/recalibrated/test.recal.cram.crai - - path: results/preprocessing/csv/markduplicates.csv - - path: results/preprocessing/csv/markduplicates_test.csv - - path: results/preprocessing/csv/markduplicates_no_table.csv - - path: results/preprocessing/csv/markduplicates_no_table_test.csv - - path: results/preprocessing/csv/recalibrated.csv - - path: results/preprocessing/csv/recalibrated_test.csv - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/qualimap/test/test.mapped - - path: results/reports/qualimap/test/test.recal - - path: results/reports/samtools_stats/test/test.md.cram.stats - - path: results/reports/samtools_stats/test/test.recal.cram.stats diff --git a/tests/test_tools_manually.yml b/tests/test_tools_manually.yml new file mode 100644 index 0000000000..b7525103f5 --- /dev/null +++ b/tests/test_tools_manually.yml @@ -0,0 +1,50 @@ +- name: Run variant calling on somatic samples with ascat + command: nextflow run main.nf -profile test,tools_somatic_ascat --outdir results + tags: + - ascat_manual + - manual + - somatic + - variant_calling + files: + - path: results/variant_calling/ascat/sample4_vs_sample3/sample4_vs_sample3.tumour_normalBAF.txt + md5sum: b73a38fd183143b1e8aed9f261a9c5f0 + - path: results/variant_calling/ascat/sample4_vs_sample3/sample4_vs_sample3.tumour_tumourLogR.txt + md5sum: 29f29092c19274aa3d5fd4f9e3828cbb + - path: results/variant_calling/ascat/sample4_vs_sample3/sample4_vs_sample3.after_correction_gc_rt.sample4_vs_sample3.tumour.tumour.png + md5sum: df246ef9c2c4dc868901afe17366e116 + - path: results/variant_calling/ascat/sample4_vs_sample3/sample4_vs_sample3.before_correction.sample4_vs_sample3.tumour.tumour.png + md5sum: 7629826e2e02ab99bedbab75b0c022a5 + - path: results/variant_calling/ascat/sample4_vs_sample3/sample4_vs_sample3.cnvs.txt + md5sum: 68b329da9893e34099c7d8ad5cb9c940 + - path: results/variant_calling/ascat/sample4_vs_sample3/sample4_vs_sample3.tumour_tumourBAF.txt + md5sum: 5235f69624ab91f395ebf30b90c02e9e + - path: results/variant_calling/ascat/sample4_vs_sample3/sample4_vs_sample3.tumour.ASPCF.png + md5sum: 883075c53513dea8bbcb85ad564cb641 + - path: results/variant_calling/ascat/sample4_vs_sample3/sample4_vs_sample3.after_correction_gc_rt.sample4_vs_sample3.tumour.germline.png + md5sum: 1a47cc241548fa89f914c2f5bfee6bee + - path: results/variant_calling/ascat/sample4_vs_sample3/sample4_vs_sample3.metrics.txt + md5sum: f7e486e5eed6166dedf9306235f537ec + - path: results/variant_calling/ascat/sample4_vs_sample3/sample4_vs_sample3.before_correction.sample4_vs_sample3.tumour.germline.png + md5sum: 1a47cc241548fa89f914c2f5bfee6bee + - path: results/variant_calling/ascat/sample4_vs_sample3/sample4_vs_sample3.purityploidy.txt + md5sum: f1484c2b120834d3db8774ad02a038b9 + - path: results/variant_calling/ascat/sample4_vs_sample3/sample4_vs_sample3.tumour.sunrise.png + md5sum: a2bf4b04176983a87b85843c789eaab8 + - path: results/variant_calling/ascat/sample4_vs_sample3/sample4_vs_sample3.segments.txt + md5sum: 68b329da9893e34099c7d8ad5cb9c940 + - path: results/variant_calling/ascat/sample4_vs_sample3/sample4_vs_sample3.tumour_normalLogR.txt + md5sum: 05418a7d814db11808172a4f57d040a1 + - path: results/reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/sample3/sample3.recal.regions.bed.gz + - path: results/reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi + - path: results/reports/samtools/sample3/sample3.recal.cram.stats + # conda changes md5sums for test + - path: results/reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt + - path: results/reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt + - path: results/reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt + - path: results/reports/mosdepth/sample4/sample4.recal.regions.bed.gz + - path: results/reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi + - path: results/reports/samtools/sample4/sample4.recal.cram.stats + # conda changes md5sums for test diff --git a/tests/test_umi.yml b/tests/test_umi.yml deleted file mode 100644 index 3d664ec31f..0000000000 --- a/tests/test_umi.yml +++ /dev/null @@ -1,94 +0,0 @@ -- name: Run UMI Quiaseq test - command: nextflow run main.nf -profile test,umi_quiaseq,docker - tags: - - umi_quiaseq - - umi - files: - - path: results/fastqtobam/1234N-SRR7545951_umi_converted.bam - md5sum: 1473c98be04aff10f80e2946b111c459 - - path: results/bam2fastq/1234N-SRR7545951_interleaved.fq.gz - md5sum: 17c0cb479e8117016e9f0d835d4df50f - - path: results/callumiconsensus/1234N-SRR7545951_umi-consensus.bam - md5sum: 3be1adf27d473de12cab895423a5f3ed - - path: results/cat/1234N-SRR7545951_1.merged.fastq.gz - md5sum: c1ed636a11b3031c72f3d4c84a7a0949 - - path: results/cat/1234N-SRR7545951_2.merged.fastq.gz - md5sum: 0bbef631b8de80f2abe441de713cbb49 - - path: results/groupreadsbyumi/1234N-SRR7545951_umi-grouped.bam - md5sum: e3bb348056513c46d7a60df4d7a8cc97 - - path: results/groupreadsbyumi/1234N-SRR7545951_umi_histogram.txt - md5sum: 2e1e9a774135675a3d1a3b0a88d29c53 - - path: results/samblaster/1234N-SRR7545951_unsorted_tagged.bam - md5sum: daa51939768c256315ce700dabaccad6 - - path: results/samtools/1234N-SRR7545951.map_map.bam - md5sum: 741de784bb83815fab8b2d23d88c74d7 - - path: results/samtools/1234N-SRR7545951.map_unmap.bam - md5sum: 31627b2d163da372584bb78258c046e5 - - path: results/samtools/1234N-SRR7545951.mapped_1.fq.gz - md5sum: 709872fc2910431b1e8b7074bfe38c67 - - path: results/samtools/1234N-SRR7545951.mapped_2.fq.gz - md5sum: 709872fc2910431b1e8b7074bfe38c67 - - path: results/samtools/1234N-SRR7545951.mapped_other.fq.gz - md5sum: 709872fc2910431b1e8b7074bfe38c67 - - path: results/samtools/1234N-SRR7545951.mapped_singleton.fq.gz - md5sum: 709872fc2910431b1e8b7074bfe38c67 - - path: results/samtools/1234N-SRR7545951.unmap_map.bam - md5sum: 42fb9dbb5a157c194c77b1de5b4f3b39 - - path: results/samtools/1234N-SRR7545951.unmap_unmap.bam - md5sum: 2075e48cd007f29c8f6fdc518e587e27 - - path: results/samtools/1234N-SRR7545951.unmapped_1.fq.gz - md5sum: 1f95ae5bbc7345372005b6673884f853 - - path: results/samtools/1234N-SRR7545951.unmapped_2.fq.gz - md5sum: 9902e27a99c2a628d184cb91759d6ff7 - - path: results/samtools/1234N-SRR7545951.unmapped_other.fq.gz - md5sum: 709872fc2910431b1e8b7074bfe38c67 - - path: results/samtools/1234N-SRR7545951.unmapped_singleton.fq.gz - md5sum: 709872fc2910431b1e8b7074bfe38c67 - - -- name: Run UMI TSO test - command: nextflow run main.nf -profile test,umi_tso,docker - tags: - - umi_tso - - umi - files: - - path: results/fastqtobam/1234N-HT1080_umi_converted.bam - md5sum: aaf4b9a80f4b780d93820df8b536c531 - - path: results/bam2fastq/1234N-HT1080_interleaved.fq.gz - md5sum: b037fe8e180e805226485d9bea4a69e5 - - path: results/callumiconsensus/1234N-HT1080_umi-consensus.bam - md5sum: 807bfe6a82eabdf29aaf8bc6a832f0cd - - path: results/cat/1234N-HT1080_1.merged.fastq.gz - md5sum: c8f0d1308df4c46c0716ccfe76f78fb4 - - path: results/cat/1234N-HT1080_2.merged.fastq.gz - md5sum: 1b42767b2d5dbc99e447c41d6d2418ab - - path: results/groupreadsbyumi/1234N-HT1080_umi-grouped.bam - md5sum: c9c76b410ba208fea76c79cecfb45241 - - path: results/groupreadsbyumi/1234N-HT1080_umi_histogram.txt - md5sum: 3dec92ae49763c10c0b2e79827d25ffb - - path: results/samblaster/1234N-HT1080_unsorted_tagged.bam - md5sum: 628bd970fa00615fb9f841bad7c8f597 - - path: results/samtools/1234N-HT1080.map_map.bam - md5sum: 2689e640c3ededf70228a44ee4b1de48 - - path: results/samtools/1234N-HT1080.map_unmap.bam - md5sum: 37300fea69f7c400220a03ca5854d04b - - path: results/samtools/1234N-HT1080.mapped_1.fq.gz - md5sum: 709872fc2910431b1e8b7074bfe38c67 - - path: results/samtools/1234N-HT1080.mapped_2.fq.gz - md5sum: 709872fc2910431b1e8b7074bfe38c67 - - path: results/samtools/1234N-HT1080.mapped_other.fq.gz - md5sum: 709872fc2910431b1e8b7074bfe38c67 - - path: results/samtools/1234N-HT1080.mapped_singleton.fq.gz - md5sum: 709872fc2910431b1e8b7074bfe38c67 - - path: results/samtools/1234N-HT1080.unmap_map.bam - md5sum: 6267d0ed8274e1fa7aad8d1e71d98696 - - path: results/samtools/1234N-HT1080.unmap_unmap.bam - md5sum: 32ed07aa23900f5722e5983a646a5ac2 - - path: results/samtools/1234N-HT1080.unmapped_1.fq.gz - md5sum: d9a7a279c05563a37d9a5a6938be68fb - - path: results/samtools/1234N-HT1080.unmapped_2.fq.gz - md5sum: afe191dcd48360f96bdb583fa60ff400 - - path: results/samtools/1234N-HT1080.unmapped_other.fq.gz - md5sum: 709872fc2910431b1e8b7074bfe38c67 - - path: results/samtools/1234N-HT1080.unmapped_singleton.fq.gz - md5sum: 709872fc2910431b1e8b7074bfe38c67 diff --git a/tests/tumor-normal-pair.nf.test b/tests/tumor-normal-pair.nf.test new file mode 100644 index 0000000000..fa70bcc06e --- /dev/null +++ b/tests/tumor-normal-pair.nf.test @@ -0,0 +1,20 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --input tests/csv/3.0/fastq_pair.csv", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_pair.csv" + ] + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/tumor-normal-pair.nf.test.snap b/tests/tumor-normal-pair.nf.test.snap new file mode 100644 index 0000000000..69c4d6a486 --- /dev/null +++ b/tests/tumor-normal-pair.nf.test.snap @@ -0,0 +1,363 @@ +{ + "-profile test --input tests/csv/3.0/fastq_pair.csv": { + "content": [ + 40, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "STRELKA_SOMATIC": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/markduplicates/test2", + "preprocessing/markduplicates/test2/test2.md.cram", + "preprocessing/markduplicates/test2/test2.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recal_table/test2", + "preprocessing/recal_table/test2/test2.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "preprocessing/recalibrated/test2", + "preprocessing/recalibrated/test2/test2.recal.cram", + "preprocessing/recalibrated/test2/test2.recal.cram.crai", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/test", + "reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/test2_vs_test", + "reports/bcftools/strelka/test2_vs_test/test2_vs_test.strelka.somatic_indels.bcftools_stats.txt", + "reports/bcftools/strelka/test2_vs_test/test2_vs_test.strelka.somatic_snvs.bcftools_stats.txt", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test2-test_L1", + "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.html", + "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.zip", + "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.html", + "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/markduplicates/test2", + "reports/markduplicates/test2/test2.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/mosdepth/test2", + "reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt", + "reports/mosdepth/test2/test2.md.mosdepth.region.dist.txt", + "reports/mosdepth/test2/test2.md.mosdepth.summary.txt", + "reports/mosdepth/test2/test2.md.regions.bed.gz", + "reports/mosdepth/test2/test2.md.regions.bed.gz.csi", + "reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test2/test2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test2/test2.recal.mosdepth.summary.txt", + "reports/mosdepth/test2/test2.recal.regions.bed.gz", + "reports/mosdepth/test2/test2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/samtools/test2", + "reports/samtools/test2/test2.md.cram.stats", + "reports/samtools/test2/test2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/test", + "reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.count", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/test2_vs_test", + "reports/vcftools/strelka/test2_vs_test/test2_vs_test.strelka.somatic_indels.FILTER.summary", + "reports/vcftools/strelka/test2_vs_test/test2_vs_test.strelka.somatic_indels.TsTv.count", + "reports/vcftools/strelka/test2_vs_test/test2_vs_test.strelka.somatic_indels.TsTv.qual", + "reports/vcftools/strelka/test2_vs_test/test2_vs_test.strelka.somatic_snvs.FILTER.summary", + "reports/vcftools/strelka/test2_vs_test/test2_vs_test.strelka.somatic_snvs.TsTv.count", + "reports/vcftools/strelka/test2_vs_test/test2_vs_test.strelka.somatic_snvs.TsTv.qual", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/test", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/test2_vs_test", + "variant_calling/strelka/test2_vs_test/test2_vs_test.strelka.somatic_indels.vcf.gz", + "variant_calling/strelka/test2_vs_test/test2_vs_test.strelka.somatic_indels.vcf.gz.tbi", + "variant_calling/strelka/test2_vs_test/test2_vs_test.strelka.somatic_snvs.vcf.gz", + "variant_calling/strelka/test2_vs_test/test2_vs_test.strelka.somatic_snvs.vcf.gz.tbi" + ], + [ + "fastqc-status-check-heatmap.txt:md5,eeb4e7e7a45f4223c86bfe3aea81f90b", + "fastqc_adapter_content_plot.txt:md5,cc7a809f9f001c10646ee4199ccdb40f", + "fastqc_per_base_n_content_plot.txt:md5,1eba855ae0fa5b5ed4a1f90d1c97f759", + "fastqc_per_base_sequence_quality_plot.txt:md5,cbb2743dfb2ec74e72b578c83ec28ee8", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,73c884822eba0bafcdf34b90fe81aec5", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,24eeb00e5e2b11c7ab90a3223d429d15", + "fastqc_per_sequence_quality_scores_plot.txt:md5,6f048594f02effb93608665be29bd35a", + "fastqc_sequence_counts_plot.txt:md5,fca7ee9ef3382e2837a302d8c5d33769", + "fastqc_sequence_duplication_levels_plot.txt:md5,2aa0c6f33e4cffbb29cdabe2c28bb097", + "fastqc_sequence_length_distribution_plot.txt:md5,61b1fe978a2c73b86c30c27ee4bc60ae", + "mosdepth-coverage-per-contig-single.txt:md5,f73b2f18633bb4b386b4f9e9fcdffb6a", + "mosdepth-cumcoverage-dist-id.txt:md5,954642b30618e908300aa9ac1673985e", + "mosdepth_perchrom.txt:md5,f73b2f18633bb4b386b4f9e9fcdffb6a", + "multiqc_citations.txt:md5,790a7694da30294b51fa1d6c3eb8ba2b", + "multiqc_fastqc.txt:md5,2fd25e8c81f962594b801d5a9df3cd87", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,13dab249f7bef935dfef234299550db8", + "samtools_alignment_plot.txt:md5,44149e0c5cc4bfa58242824b300219a2", + "test.strelka.variants.bcftools_stats.txt:md5,2613827870dd789fe602a8a3b739b7f2", + "test2_vs_test.strelka.somatic_indels.bcftools_stats.txt:md5,5e8f9a8fdbc765ced736d0c8c7dd3a52", + "test2_vs_test.strelka.somatic_snvs.bcftools_stats.txt:md5,edb7763fad7b6f825e47e01ffa70adbc", + "test.md.mosdepth.global.dist.txt:md5,76fa71922a3f748e507c2364c531dfcb", + "test.md.mosdepth.region.dist.txt:md5,abc5df85e302b79985627888870882da", + "test.md.mosdepth.summary.txt:md5,d536456436eb275159b8c6af83213d80", + "test.md.regions.bed.gz:md5,b25a2798061021c0b2f4e1d18219bbbd", + "test.md.regions.bed.gz.csi:md5,b1c2a861f64e20a94108a6de3b76c582", + "test.recal.mosdepth.global.dist.txt:md5,76fa71922a3f748e507c2364c531dfcb", + "test.recal.mosdepth.region.dist.txt:md5,abc5df85e302b79985627888870882da", + "test.recal.mosdepth.summary.txt:md5,d536456436eb275159b8c6af83213d80", + "test.recal.regions.bed.gz:md5,b25a2798061021c0b2f4e1d18219bbbd", + "test.recal.regions.bed.gz.csi:md5,b1c2a861f64e20a94108a6de3b76c582", + "test2.md.mosdepth.global.dist.txt:md5,2020cf6dfc7ddca020c921dd9f0549b7", + "test2.md.mosdepth.region.dist.txt:md5,38ff8b38c33b9231f047fea8ea830aae", + "test2.md.mosdepth.summary.txt:md5,8b991358768cade225470a07cd34f573", + "test2.md.regions.bed.gz:md5,08e767f91a0a8d82733f0040e804a85f", + "test2.md.regions.bed.gz.csi:md5,d5f1c9389ecf52ba839e834780a94549", + "test2.recal.mosdepth.global.dist.txt:md5,2020cf6dfc7ddca020c921dd9f0549b7", + "test2.recal.mosdepth.region.dist.txt:md5,38ff8b38c33b9231f047fea8ea830aae", + "test2.recal.mosdepth.summary.txt:md5,8b991358768cade225470a07cd34f573", + "test2.recal.regions.bed.gz:md5,08e767f91a0a8d82733f0040e804a85f", + "test2.recal.regions.bed.gz.csi:md5,d5f1c9389ecf52ba839e834780a94549", + "test.strelka.variants.FILTER.summary:md5,dd87f507da7de20d5318841af312493b", + "test.strelka.variants.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "test2_vs_test.strelka.somatic_indels.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "test2_vs_test.strelka.somatic_indels.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f", + "test2_vs_test.strelka.somatic_snvs.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "test2_vs_test.strelka.somatic_snvs.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f" + ], + "No BAM files", + [ + "test.md.cram:md5,59ecc5c82c7af1283eea7507c590c831", + "test2.md.cram:md5,bac87cf9290577fd9a4def63e046031f", + "test.recal.cram:md5,654909615a48db30bdc14ec4d9d7d17c", + "test2.recal.cram:md5,f4205ab086600ba2927e1468dc732976" + ], + [ + "test.strelka.genome.vcf.gz:md5,16437a040679d88b7d84a9276f793d6c", + "test.strelka.variants.vcf.gz:md5,666f835fdaf4952a179cdedd40c9d565", + "test2_vs_test.strelka.somatic_indels.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "test2_vs_test.strelka.somatic_snvs.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998513#1/1", + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T21:48:26.834713902" + } +} diff --git a/tests/umi_fastp.nf.test b/tests/umi_fastp.nf.test new file mode 100644 index 0000000000..e615bcfcff --- /dev/null +++ b/tests/umi_fastp.nf.test @@ -0,0 +1,25 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --input tests/csv/3.0/fastq_umi.csv --umi_length 7 --umi_location read2 --umi_base_skip 1 --tools null", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_umi.csv", + tools: null, + umi_length: 7, + umi_base_skip: 1, + umi_location: 'read2' + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/umi_fastp.nf.test.snap b/tests/umi_fastp.nf.test.snap new file mode 100644 index 0000000000..175e8235e2 --- /dev/null +++ b/tests/umi_fastp.nf.test.snap @@ -0,0 +1,352 @@ +{ + "-profile test --input tests/csv/3.0/fastq_umi.csv --umi_length 7 --umi_location read2 --umi_base_skip 1 --tools null": { + "content": [ + 18, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTP": { + "fastp": "0.24.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FGBIO_COPYUMIFROMREADNAME": { + "fgbio": "2.4.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/fastp-insert-size-plot.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp_filtered_reads_plot.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_overrepresented_sequences_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastp.txt", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/fastp-insert-size-plot.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp_filtered_reads_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastp_filtered_reads_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_overrepresented_sequences_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_top_overrepresented_sequences_table.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/fastp-insert-size-plot.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp_filtered_reads_plot-cnt.png", + "multiqc/multiqc_plots/png/fastp_filtered_reads_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_overrepresented_sequences_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/fastqc_top_overrepresented_sequences_table.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/fastp-insert-size-plot.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp_filtered_reads_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastp_filtered_reads_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_overrepresented_sequences_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_top_overrepresented_sequences_table.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/fastp", + "reports/fastp/test", + "reports/fastp/test/test-test_L1.fastp.html", + "reports/fastp/test/test-test_L1.fastp.json", + "reports/fastp/test/test-test_L1.fastp.log", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats" + ], + [ + "fastp-insert-size-plot.txt:md5,a31a6d72c93d26449f973f48288d230f", + "fastp-seq-content-gc-plot_Read_1_After_filtering.txt:md5,f547f8ef1c002e492f4d03c481018484", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.txt:md5,1122fe435b23d57590c05b345101b016", + "fastp-seq-content-gc-plot_Read_2_After_filtering.txt:md5,d2df4ebba4bf30e4d5e868cdd365ed7c", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.txt:md5,ef89eeb5e4986749ee7e239a5b8626db", + "fastp-seq-content-n-plot_Read_1_After_filtering.txt:md5,b3fd803823ccc521ae42582c03e781bc", + "fastp-seq-content-n-plot_Read_1_Before_filtering.txt:md5,6ebe51b46d79a69ac4e06600fbf358cb", + "fastp-seq-content-n-plot_Read_2_After_filtering.txt:md5,67ea1452f4a48533bee308fd63223663", + "fastp-seq-content-n-plot_Read_2_Before_filtering.txt:md5,a86d6296f572da9d31b8f588dc95e501", + "fastp-seq-quality-plot_Read_1_After_filtering.txt:md5,3a8a9ded8ba4783bfd4dbc386fc043e9", + "fastp-seq-quality-plot_Read_1_Before_filtering.txt:md5,897622b2900d1afc9564d7caf0d94ef9", + "fastp-seq-quality-plot_Read_2_After_filtering.txt:md5,ddba628c779352500766d8c6a5613907", + "fastp-seq-quality-plot_Read_2_Before_filtering.txt:md5,ac86d6c0a9300fc9fd785e22cc22f705", + "fastp_filtered_reads_plot.txt:md5,32d24a0b0a2040651fab375645eb6260", + "fastqc-status-check-heatmap.txt:md5,92fcedba625d8394492d1ef6e2eb64f0", + "fastqc_adapter_content_plot.txt:md5,2a569bfd358aced71c1abf9f6c98e077", + "fastqc_overrepresented_sequences_plot.txt:md5,df45a06e934f25d5c81b8de3af859955", + "fastqc_per_base_n_content_plot.txt:md5,9beb9c220bb0314bd5365912584958c5", + "fastqc_per_base_sequence_quality_plot.txt:md5,69ca0488c8687d38e5db551c49fd9dce", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,823b5a69dfbae061cd2eabba777cdbbf", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,c01f53f345f1ed4e621f2ad5309786c6", + "fastqc_per_sequence_quality_scores_plot.txt:md5,74de77d7e26e0a7d84ffcadcc82958d5", + "fastqc_sequence_counts_plot.txt:md5,1a4aab87f1bd4fd9f69cabef40f5e5ed", + "fastqc_sequence_duplication_levels_plot.txt:md5,72e6366160d000a2990b2bbd321300a7", + "fastqc_sequence_length_distribution_plot.txt:md5,7c07faf0c0b90613cdc82a9f09bc1f19", + "mosdepth-coverage-per-contig-single.txt:md5,45bca136491a7675ebba87def2534f26", + "mosdepth-cumcoverage-dist-id.txt:md5,5e049e05573f1afd0bb893f2cb4076c6", + "mosdepth_perchrom.txt:md5,45bca136491a7675ebba87def2534f26", + "multiqc_citations.txt:md5,0e2971e7a873c92592112775fa99fb02", + "multiqc_fastp.txt:md5,8e11d751e997877e4ffc487c52756f62", + "multiqc_fastqc.txt:md5,0ca2cba4204d9076a1eb17596379d10c", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,a6f11f838a72299e4913a54dd327f2e6", + "samtools_alignment_plot.txt:md5,111d8b457e8c34dc919a527f12f27d4a", + "test-test_L1.fastp.json:md5,d9107ec414e44408d0698b167cc445f9", + "test.md.mosdepth.global.dist.txt:md5,40bf266b2080717f92b405cb42fab4a7", + "test.md.mosdepth.region.dist.txt:md5,94b080042f51f484fa178339cc9324bc", + "test.md.mosdepth.summary.txt:md5,7958c5422bb4b5181afe9782e38a735a", + "test.md.regions.bed.gz:md5,fe11b3aedde07e4e17114b5b37cdb3b8", + "test.md.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917", + "test.recal.mosdepth.global.dist.txt:md5,40bf266b2080717f92b405cb42fab4a7", + "test.recal.mosdepth.region.dist.txt:md5,94b080042f51f484fa178339cc9324bc", + "test.recal.mosdepth.summary.txt:md5,7958c5422bb4b5181afe9782e38a735a", + "test.recal.regions.bed.gz:md5,fe11b3aedde07e4e17114b5b37cdb3b8", + "test.recal.regions.bed.gz.csi:md5,0dc011f3344841dc14aa488da905e917" + ], + "No BAM files", + [ + "test.md.cram:md5,dc339a125287041fa36a4cdda432ec8c", + "test.recal.cram:md5,dc339a125287041fa36a4cdda432ec8c" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_umi_cleanname_1.fastq.gz): Cannot extract flowcell ID from @922332" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T21:51:24.395033366" + } +} diff --git a/tests/umi_fgbio.nf.test b/tests/umi_fgbio.nf.test new file mode 100644 index 0000000000..c91b890de3 --- /dev/null +++ b/tests/umi_fgbio.nf.test @@ -0,0 +1,58 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --input tests/csv/3.0/fastq_umi.csv --umi_read_structure '+T 7M1S+T' --tools null", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_umi.csv", + tools: null, + umi_read_structure: '+T 7M1S+T' + ] + ], + [ + name: "-profile test --input tests/csv/3.0/fastq_umi.csv --umi_read_structure '12+T S' --tools null", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_umi.csv", + tools: null, + umi_read_structure: '12+T S' + ], + failure: true, + snapshot: 'stdout', + snapshot_include: 'input UMI Read structure' + ], + [ + name: "-profile test --input tests/csv/3.0/fastq_umi.csv --umi_read_structure '+T 7M1S+T' --tools null --aligner sentieon-bwamem", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_umi.csv", + tools: null, + umi_read_structure: '+T 7M1S+T', + aligner: 'sentieon-bwamem' + ], + failure: true, + snapshot: 'stdout', + snapshot_include: 'currently not compatible' + ], + [ + name: "-profile test --input tests/csv/3.0/fastq_umi.csv --umi_read_structure '+T 7M1S+T' --tools null --aligner parabricks", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_umi.csv", + tools: null, + umi_read_structure: '+T 7M1S+T', + aligner: 'parabricks' + ], + failure: true, + snapshot: 'stdout', + snapshot_include: 'currently not compatible' + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/umi_fgbio.nf.test.snap b/tests/umi_fgbio.nf.test.snap new file mode 100644 index 0000000000..754d00cfc1 --- /dev/null +++ b/tests/umi_fgbio.nf.test.snap @@ -0,0 +1,369 @@ +{ + "-profile test --input tests/csv/3.0/fastq_umi.csv --umi_read_structure '+T 7M1S+T' --tools null --aligner parabricks": { + "content": [ + [ + "pipeline_info" + ], + "No stable content", + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings", + [ + "parabricks is currently not compatible with FGBio UMI handling. Please choose a different aligner." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-16T11:40:06.112688481" + }, + "-profile test --input tests/csv/3.0/fastq_umi.csv --umi_read_structure '+T 7M1S+T' --tools null": { + "content": [ + 29, + { + "BAM2FASTQ": { + "samtools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CALLUMICONSENSUS": { + "fgbio": "2.4.0" + }, + "CAT_FASTQ": { + "cat": 9.5 + }, + "COLLATE_FASTQ_MAP": { + "samtools": 1.21 + }, + "COLLATE_FASTQ_UNMAP": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FASTQTOBAM": { + "fgbio": "2.4.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "GROUPREADSBYUMI": { + "fgbio": "2.4.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_MERGE_UNMAP": { + "samtools": 1.21 + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_MAP_MAP": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_MAP_UNMAP": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_UNMAP_MAP": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_UNMAP_UNMAP": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_overrepresented_sequences_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_overrepresented_sequences_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_top_overrepresented_sequences_table.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_overrepresented_sequences_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/fastqc_top_overrepresented_sequences_table.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_overrepresented_sequences_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_top_overrepresented_sequences_table.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "preprocessing/umi", + "preprocessing/umi/test", + "preprocessing/umi/test/test_umi-consensus.bam", + "reference", + "reports", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/umi", + "reports/umi/test_umi-grouped_histogram.txt" + ], + [ + "fastqc-status-check-heatmap.txt:md5,92fcedba625d8394492d1ef6e2eb64f0", + "fastqc_adapter_content_plot.txt:md5,2a569bfd358aced71c1abf9f6c98e077", + "fastqc_overrepresented_sequences_plot.txt:md5,df45a06e934f25d5c81b8de3af859955", + "fastqc_per_base_n_content_plot.txt:md5,9beb9c220bb0314bd5365912584958c5", + "fastqc_per_base_sequence_quality_plot.txt:md5,69ca0488c8687d38e5db551c49fd9dce", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,823b5a69dfbae061cd2eabba777cdbbf", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,c01f53f345f1ed4e621f2ad5309786c6", + "fastqc_per_sequence_quality_scores_plot.txt:md5,74de77d7e26e0a7d84ffcadcc82958d5", + "fastqc_sequence_counts_plot.txt:md5,1a4aab87f1bd4fd9f69cabef40f5e5ed", + "fastqc_sequence_duplication_levels_plot.txt:md5,72e6366160d000a2990b2bbd321300a7", + "fastqc_sequence_length_distribution_plot.txt:md5,7c07faf0c0b90613cdc82a9f09bc1f19", + "mosdepth-coverage-per-contig-single.txt:md5,800ad271050c46d24bf53c0f8bdf7b6e", + "mosdepth-cumcoverage-dist-id.txt:md5,3c526ec6da433233b419bdbdd19a1b56", + "mosdepth_perchrom.txt:md5,800ad271050c46d24bf53c0f8bdf7b6e", + "multiqc_citations.txt:md5,7d0b4b866fa577272c48a1f3ad72e75d", + "multiqc_fastqc.txt:md5,0ca2cba4204d9076a1eb17596379d10c", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,4d27768d75a69a61daf23bf5efe17687", + "samtools_alignment_plot.txt:md5,3c2a882852a7ffe06610c9027f0a312a", + "test.md.mosdepth.global.dist.txt:md5,09d22913aa50a0207f97a3f85b182c6e", + "test.md.mosdepth.region.dist.txt:md5,61676a4a3668c0e84eb0f56dc6bda1ae", + "test.md.mosdepth.summary.txt:md5,9bbea5e4d213a51f501c2aadff8d4526", + "test.md.regions.bed.gz:md5,e24c18ad56e54376c41fdaacec372f9e", + "test.md.regions.bed.gz.csi:md5,d0713716f63ac573f4a3385733e9a537", + "test.recal.mosdepth.global.dist.txt:md5,09d22913aa50a0207f97a3f85b182c6e", + "test.recal.mosdepth.region.dist.txt:md5,61676a4a3668c0e84eb0f56dc6bda1ae", + "test.recal.mosdepth.summary.txt:md5,9bbea5e4d213a51f501c2aadff8d4526", + "test.recal.regions.bed.gz:md5,e24c18ad56e54376c41fdaacec372f9e", + "test.recal.regions.bed.gz.csi:md5,d0713716f63ac573f4a3385733e9a537", + "test_umi-grouped_histogram.txt:md5,85292e9acb83edf17110dce17be27f44" + ], + [ + "test_umi-consensus.bam:md5,18d420720f72366289d3a324f4552522" + ], + [ + "test.md.cram:md5,2d0c176fed158d84a061b69e1947994c", + "test.recal.cram:md5,6927569ca52dbba512d27f8742bc6aae" + ], + "No VCF files", + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_umi_cleanname_1.fastq.gz): Cannot extract flowcell ID from @922332" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T21:53:07.910593016" + }, + "-profile test --input tests/csv/3.0/fastq_umi.csv --umi_read_structure '+T 7M1S+T' --tools null --aligner sentieon-bwamem": { + "content": [ + [ + "pipeline_info" + ], + "No stable content", + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings", + [ + "sentieon-bwamem is currently not compatible with FGBio UMI handling. Please choose a different aligner." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-16T11:39:15.062875515" + }, + "-profile test --input tests/csv/3.0/fastq_umi.csv --umi_read_structure '12+T S' --tools null": { + "content": [ + [ + "pipeline_info" + ], + "No stable content", + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings", + [ + "Please check the input UMI Read structure`12+T S` invalid" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-16T11:45:20.994546242" + } +} diff --git a/tests/umi_in_read_names.nf.test b/tests/umi_in_read_names.nf.test new file mode 100644 index 0000000000..ec32e0602b --- /dev/null +++ b/tests/umi_in_read_names.nf.test @@ -0,0 +1,31 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --input tests/csv/3.0/bam_umi_header.csv --umi_in_read_header --tools markduplicates --step mapping", + params: [ + input: "${projectDir}/tests/csv/3.0/bam_umi_header.csv", + umi_in_read_header: true, + step: "mapping" + ] + ], + [ + name: "-profile test --input tests/csv/3.0/bam_umi_header.csv --umi_in_read_header --tools markduplicates --step markduplicates", + params: [ + input: "${projectDir}/tests/csv/3.0/bam_umi_header.csv", + umi_in_read_header: true, + step: "markduplicates" + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/umi_in_read_names.nf.test.snap b/tests/umi_in_read_names.nf.test.snap new file mode 100644 index 0000000000..32e8332cdf --- /dev/null +++ b/tests/umi_in_read_names.nf.test.snap @@ -0,0 +1,618 @@ +{ + "-profile test --input tests/csv/3.0/bam_umi_header.csv --umi_in_read_header --tools markduplicates --step markduplicates": { + "content": [ + 19, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FGBIO_COPYUMIFROMREADNAME": { + "fgbio": "2.4.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/test", + "reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/test", + "reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.count", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/test", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,52b3e9cece0b83af190f717512c920c1", + "mosdepth-cumcoverage-dist-id.txt:md5,7faeeef70cfa29a85ff2d6682db7de2b", + "mosdepth_perchrom.txt:md5,52b3e9cece0b83af190f717512c920c1", + "multiqc_citations.txt:md5,ace4ca89138a5f1e2be289c157c00bd9", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,175b0442368f03294ef9ce9e73172c7b", + "samtools_alignment_plot.txt:md5,6555bbf8c1ef12058e9a050545a9fa41", + "test.strelka.variants.bcftools_stats.txt:md5,5b7cff7b17c5bcde6177a188b1511986", + "test.md.mosdepth.global.dist.txt:md5,0c795eadf5e5a8ea8469ba8c0ece9bdd", + "test.md.mosdepth.region.dist.txt:md5,54273a37adf55f8cee822aaf92a4c321", + "test.md.mosdepth.summary.txt:md5,bb12489d3d44c4e44eb5dcc8fdef636c", + "test.md.regions.bed.gz:md5,24cd07f8ce342a277f2954d6e731015e", + "test.md.regions.bed.gz.csi:md5,db6f77bcdbb7ada7e0c93e5b987b8265", + "test.recal.mosdepth.global.dist.txt:md5,0c795eadf5e5a8ea8469ba8c0ece9bdd", + "test.recal.mosdepth.region.dist.txt:md5,54273a37adf55f8cee822aaf92a4c321", + "test.recal.mosdepth.summary.txt:md5,bb12489d3d44c4e44eb5dcc8fdef636c", + "test.recal.regions.bed.gz:md5,24cd07f8ce342a277f2954d6e731015e", + "test.recal.regions.bed.gz.csi:md5,db6f77bcdbb7ada7e0c93e5b987b8265", + "test.strelka.variants.FILTER.summary:md5,f58effcb4006be9b10081e4417197aa6", + "test.strelka.variants.TsTv.count:md5,fccdacf16ebe20d74f36bd7b38f11ca4" + ], + "No BAM files", + [ + "test.md.cram:md5,1dd9ef98ff12ff54d1017fb73a6e11a4", + "test.recal.cram:md5,1dd9ef98ff12ff54d1017fb73a6e11a4" + ], + [ + "test.strelka.genome.vcf.gz:md5,c575940c23d416fc423c2f453f443e61", + "test.strelka.variants.vcf.gz:md5,20a32949b91e4986cb39355732cbd44d" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T09:52:02.012960987" + }, + "-profile test --input tests/csv/3.0/bam_umi_header.csv --umi_in_read_header --tools markduplicates --step mapping": { + "content": [ + 30, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CAT_FASTQ": { + "cat": 9.5 + }, + "COLLATE_FASTQ_MAP": { + "samtools": 1.21 + }, + "COLLATE_FASTQ_UNMAP": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FGBIO_COPYUMIFROMREADNAME": { + "fgbio": "2.4.0" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_MERGE_UNMAP": { + "samtools": 1.21 + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_MAP_MAP": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_MAP_UNMAP": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_UNMAP_MAP": { + "samtools": 1.21 + }, + "SAMTOOLS_VIEW_UNMAP_UNMAP": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_overrepresented_sequences_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_overrepresented_sequences_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_top_overrepresented_sequences_table.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_overrepresented_sequences_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/fastqc_top_overrepresented_sequences_table.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_overrepresented_sequences_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_top_overrepresented_sequences_table.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/test", + "reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt", + "reports/fastqc", + "reports/fastqc/test-1", + "reports/fastqc/test-1/test-1_1_fastqc.html", + "reports/fastqc/test-1/test-1_1_fastqc.zip", + "reports/fastqc/test-1/test-1_2_fastqc.html", + "reports/fastqc/test-1/test-1_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/test", + "reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.count", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/test", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi" + ], + [ + "fastqc-status-check-heatmap.txt:md5,360c48a151504d816500299220311e35", + "fastqc_adapter_content_plot.txt:md5,b9f10f9b81f8f5fe094f5414bd2b4ec5", + "fastqc_overrepresented_sequences_plot.txt:md5,b708b395ebd0771517854b354a6cea66", + "fastqc_per_base_n_content_plot.txt:md5,49080e84997e76dcf2f053e7de863a26", + "fastqc_per_base_sequence_quality_plot.txt:md5,94b27da4812230421c3d6792144fc1c1", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,3f18d62eaea5e919a8df0f6bedfe6bc9", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,02a7081512be7f6ffbcc10bf63be4ba0", + "fastqc_per_sequence_quality_scores_plot.txt:md5,2a54f52d931b8165bbd5226924810b72", + "fastqc_sequence_counts_plot.txt:md5,d5f0a3b22695c49d652d0ed463a08062", + "fastqc_sequence_duplication_levels_plot.txt:md5,57cbfa4d81b8cde028c94657db09cb0b", + "fastqc_sequence_length_distribution_plot.txt:md5,00ebb6632eabcb53b49b9360c87ad297", + "mosdepth-coverage-per-contig-single.txt:md5,e2db64a56c2ffea1e9741e1919d656c3", + "mosdepth-cumcoverage-dist-id.txt:md5,c6f6f5a148679687fbf8e8a480ef4c0d", + "mosdepth_perchrom.txt:md5,e2db64a56c2ffea1e9741e1919d656c3", + "multiqc_citations.txt:md5,ace4ca89138a5f1e2be289c157c00bd9", + "multiqc_fastqc.txt:md5,96c4b1777e0faee9b1deeb15393b3ba3", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,afe461017b5f2189651f29c79f6fee44", + "samtools_alignment_plot.txt:md5,9d6e0d529e64404f2c24e74285e242ee", + "test.strelka.variants.bcftools_stats.txt:md5,3f4c40f17e956e4140a0432072a9dde0", + "test.md.mosdepth.global.dist.txt:md5,7b8a7773d5d854ae1b681b971d11bb34", + "test.md.mosdepth.region.dist.txt:md5,1dd626f6fef0d1ac29fd7ffb8b1bbd95", + "test.md.mosdepth.summary.txt:md5,25af78b074abf710ddd9c90cba1928bc", + "test.md.regions.bed.gz:md5,813d852c5a237022fd4b7446d12f7772", + "test.md.regions.bed.gz.csi:md5,10b7f8e9ac2ac27205f8bc827a186195", + "test.recal.mosdepth.global.dist.txt:md5,7b8a7773d5d854ae1b681b971d11bb34", + "test.recal.mosdepth.region.dist.txt:md5,1dd626f6fef0d1ac29fd7ffb8b1bbd95", + "test.recal.mosdepth.summary.txt:md5,25af78b074abf710ddd9c90cba1928bc", + "test.recal.regions.bed.gz:md5,813d852c5a237022fd4b7446d12f7772", + "test.recal.regions.bed.gz.csi:md5,10b7f8e9ac2ac27205f8bc827a186195", + "test.strelka.variants.FILTER.summary:md5,869e416e19c9d8c7c6924758e66ea426", + "test.strelka.variants.TsTv.count:md5,2a049bc323b4bc559da89d136a567e28" + ], + "No BAM files", + [ + "test.md.cram:md5,93ff54aed62b2509c88611347f884c97", + "test.recal.cram:md5,93ff54aed62b2509c88611347f884c97" + ], + [ + "test.strelka.genome.vcf.gz:md5,cc090b782daf68775321dc9d10643be", + "test.strelka.variants.vcf.gz:md5,92a9aad92697bf9e22dab35aaad68ffa" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T22:06:00.327735562" + } +} diff --git a/tests/variant_calling_all.nf.test b/tests/variant_calling_all.nf.test new file mode 100644 index 0000000000..b363aae91d --- /dev/null +++ b/tests/variant_calling_all.nf.test @@ -0,0 +1,43 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --input tests/csv/3.0/fastq_tumor_only.csv --tools cnvkit,freebayes,mpileup,mutect2,tiddit", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_tumor_only.csv", + tools: "cnvkit,freebayes,mpileup,mutect2,tiddit" + ], + ignoreFiles: '**/*.freebayes.vcf.gz' + ], + [ + name: "-profile test --input tests/csv/3.0/fastq_pair.csv --tools cnvkit,deepvariant,freebayes,mpileup,msisensorpro,strelka,tiddit", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_pair.csv", + tools: "cnvkit,deepvariant,freebayes,mpileup,msisensorpro,strelka,tiddit" + ], + include_freebayes_unfiltered: true, + ignoreFiles: '**/*.freebayes{.filtered,}.vcf{,.gz}', + no_conda: true + ], + [ + name: "-profile test --input tests/csv/3.0/fastq_single.csv --tools cnvkit,deepvariant,freebayes,mpileup,strelka,tiddit", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_single.csv", + tools: "cnvkit,deepvariant,freebayes,mpileup,strelka,tiddit" + ], + include_freebayes_unfiltered: true, + ignoreFiles: '**/*.freebayes.vcf{,.gz}', + no_conda: true + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_all.nf.test.snap b/tests/variant_calling_all.nf.test.snap new file mode 100644 index 0000000000..ddeecf3ca7 --- /dev/null +++ b/tests/variant_calling_all.nf.test.snap @@ -0,0 +1,1604 @@ +{ + "-profile test --input tests/csv/3.0/fastq_pair.csv --tools cnvkit,deepvariant,freebayes,mpileup,msisensorpro,strelka,tiddit": { + "content": [ + 97, + { + "BCFTOOLS_MPILEUP": { + "bcftools": 1.21 + }, + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CNVKIT_ANTITARGET": { + "cnvkit": "0.9.11" + }, + "CNVKIT_BATCH": { + "cnvkit": "0.9.10", + "samtools": 1.17 + }, + "CNVKIT_CALL": { + "cnvkit": "0.9.10" + }, + "CNVKIT_EXPORT": { + "cnvkit": "0.9.10" + }, + "CNVKIT_GENEMETRICS": { + "cnvkit": "0.9.10" + }, + "CNVKIT_REFERENCE": { + "cnvkit": "0.9.11" + }, + "CRAM_TO_BAM": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "DEEPVARIANT_RUNDEEPVARIANT": { + "deepvariant": "1.9.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MSISENSORPRO_MSISOMATIC": { + "msisensor-pro": "1.3.0" + }, + "MSISENSORPRO_SCAN": { + "msisensor-pro": "1.3.0" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "STRELKA_SOMATIC": { + "strelka": "2.9.10" + }, + "SVDB_MERGE": { + "bcftools": 1.21, + "svdb": "2.8.2" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIP_TIDDIT_SV": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_VC_FREEBAYES": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "TIDDIT_SV": { + "tiddit": "3.6.1" + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/converted", + "preprocessing/converted/cram_to_bam", + "preprocessing/converted/cram_to_bam/test", + "preprocessing/converted/cram_to_bam/test/test.bam", + "preprocessing/converted/cram_to_bam/test/test.bam.bai", + "preprocessing/converted/cram_to_bam/test2", + "preprocessing/converted/cram_to_bam/test2/test2.bam", + "preprocessing/converted/cram_to_bam/test2/test2.bam.bai", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/markduplicates/test2", + "preprocessing/markduplicates/test2/test2.md.cram", + "preprocessing/markduplicates/test2/test2.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recal_table/test2", + "preprocessing/recal_table/test2/test2.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "preprocessing/recalibrated/test2", + "preprocessing/recalibrated/test2/test2.recal.cram", + "preprocessing/recalibrated/test2/test2.recal.cram.crai", + "reference", + "reference/msisensorpro", + "reports", + "reports/bcftools", + "reports/bcftools/bcftools", + "reports/bcftools/bcftools/test", + "reports/bcftools/bcftools/test/test.bcftools.bcftools_stats.txt", + "reports/bcftools/deepvariant", + "reports/bcftools/deepvariant/test", + "reports/bcftools/deepvariant/test/test.deepvariant.bcftools_stats.txt", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/test", + "reports/bcftools/freebayes/test/test.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/freebayes/test2_vs_test", + "reports/bcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/strelka", + "reports/bcftools/strelka/test", + "reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/test2_vs_test", + "reports/bcftools/strelka/test2_vs_test/test2_vs_test.strelka.somatic_indels.bcftools_stats.txt", + "reports/bcftools/strelka/test2_vs_test/test2_vs_test.strelka.somatic_snvs.bcftools_stats.txt", + "reports/bcftools/tiddit", + "reports/bcftools/tiddit/test", + "reports/bcftools/tiddit/test/test.tiddit.bcftools_stats.txt", + "reports/bcftools/tiddit/test2_vs_test", + "reports/bcftools/tiddit/test2_vs_test/test2_vs_test.tiddit_sv_merge.bcftools_stats.txt", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test2-test_L1", + "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.html", + "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.zip", + "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.html", + "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/markduplicates/test2", + "reports/markduplicates/test2/test2.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/mosdepth/test2", + "reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt", + "reports/mosdepth/test2/test2.md.mosdepth.region.dist.txt", + "reports/mosdepth/test2/test2.md.mosdepth.summary.txt", + "reports/mosdepth/test2/test2.md.regions.bed.gz", + "reports/mosdepth/test2/test2.md.regions.bed.gz.csi", + "reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test2/test2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test2/test2.recal.mosdepth.summary.txt", + "reports/mosdepth/test2/test2.recal.regions.bed.gz", + "reports/mosdepth/test2/test2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/samtools/test2", + "reports/samtools/test2/test2.md.cram.stats", + "reports/samtools/test2/test2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/bcftools", + "reports/vcftools/bcftools/test", + "reports/vcftools/bcftools/test/test.bcftools.FILTER.summary", + "reports/vcftools/bcftools/test/test.bcftools.TsTv.count", + "reports/vcftools/bcftools/test/test.bcftools.TsTv.qual", + "reports/vcftools/deepvariant", + "reports/vcftools/deepvariant/test", + "reports/vcftools/deepvariant/test/test.deepvariant.FILTER.summary", + "reports/vcftools/deepvariant/test/test.deepvariant.TsTv.count", + "reports/vcftools/deepvariant/test/test.deepvariant.TsTv.qual", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/test", + "reports/vcftools/freebayes/test/test.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.qual", + "reports/vcftools/freebayes/test2_vs_test", + "reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.TsTv.qual", + "reports/vcftools/strelka", + "reports/vcftools/strelka/test", + "reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.count", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/test2_vs_test", + "reports/vcftools/strelka/test2_vs_test/test2_vs_test.strelka.somatic_indels.FILTER.summary", + "reports/vcftools/strelka/test2_vs_test/test2_vs_test.strelka.somatic_indels.TsTv.count", + "reports/vcftools/strelka/test2_vs_test/test2_vs_test.strelka.somatic_indels.TsTv.qual", + "reports/vcftools/strelka/test2_vs_test/test2_vs_test.strelka.somatic_snvs.FILTER.summary", + "reports/vcftools/strelka/test2_vs_test/test2_vs_test.strelka.somatic_snvs.TsTv.count", + "reports/vcftools/strelka/test2_vs_test/test2_vs_test.strelka.somatic_snvs.TsTv.qual", + "reports/vcftools/tiddit", + "reports/vcftools/tiddit/test", + "reports/vcftools/tiddit/test/test.tiddit.FILTER.summary", + "reports/vcftools/tiddit/test/test.tiddit.TsTv.count", + "reports/vcftools/tiddit/test/test.tiddit.TsTv.qual", + "reports/vcftools/tiddit/test2_vs_test", + "reports/vcftools/tiddit/test2_vs_test/test2_vs_test.tiddit_sv_merge.FILTER.summary", + "reports/vcftools/tiddit/test2_vs_test/test2_vs_test.tiddit_sv_merge.TsTv.count", + "reports/vcftools/tiddit/test2_vs_test/test2_vs_test.tiddit_sv_merge.TsTv.qual", + "variant_calling", + "variant_calling/bcftools", + "variant_calling/bcftools/test", + "variant_calling/bcftools/test/test.bcftools.vcf.gz", + "variant_calling/bcftools/test/test.bcftools.vcf.gz.tbi", + "variant_calling/cnvkit", + "variant_calling/cnvkit/test", + "variant_calling/cnvkit/test/genome.antitarget.bed", + "variant_calling/cnvkit/test/genome.target.bed", + "variant_calling/cnvkit/test/reference.cnn", + "variant_calling/cnvkit/test/test-diagram.pdf", + "variant_calling/cnvkit/test/test-scatter.png", + "variant_calling/cnvkit/test/test.antitargetcoverage.cnn", + "variant_calling/cnvkit/test/test.bintest.cns", + "variant_calling/cnvkit/test/test.call.cns", + "variant_calling/cnvkit/test/test.cnr", + "variant_calling/cnvkit/test/test.cns", + "variant_calling/cnvkit/test/test.cnvcall.vcf", + "variant_calling/cnvkit/test/test.genemetrics.tsv", + "variant_calling/cnvkit/test/test.germline.call.cns", + "variant_calling/cnvkit/test/test.targetcoverage.cnn", + "variant_calling/cnvkit/test2_vs_test", + "variant_calling/cnvkit/test2_vs_test/genome.antitarget.bed", + "variant_calling/cnvkit/test2_vs_test/genome.target.bed", + "variant_calling/cnvkit/test2_vs_test/reference.cnn", + "variant_calling/cnvkit/test2_vs_test/test.antitargetcoverage.cnn", + "variant_calling/cnvkit/test2_vs_test/test.targetcoverage.cnn", + "variant_calling/cnvkit/test2_vs_test/test2-diagram.pdf", + "variant_calling/cnvkit/test2_vs_test/test2-scatter.png", + "variant_calling/cnvkit/test2_vs_test/test2.antitargetcoverage.cnn", + "variant_calling/cnvkit/test2_vs_test/test2.bintest.cns", + "variant_calling/cnvkit/test2_vs_test/test2.call.cns", + "variant_calling/cnvkit/test2_vs_test/test2.cnr", + "variant_calling/cnvkit/test2_vs_test/test2.cns", + "variant_calling/cnvkit/test2_vs_test/test2.genemetrics.tsv", + "variant_calling/cnvkit/test2_vs_test/test2.somatic.call.cns", + "variant_calling/cnvkit/test2_vs_test/test2.targetcoverage.cnn", + "variant_calling/cnvkit/test2_vs_test/test2_vs_test.cnvcall.vcf", + "variant_calling/deepvariant", + "variant_calling/deepvariant/test", + "variant_calling/deepvariant/test/test.deepvariant.g.vcf.gz", + "variant_calling/deepvariant/test/test.deepvariant.g.vcf.gz.tbi", + "variant_calling/deepvariant/test/test.deepvariant.vcf.gz", + "variant_calling/deepvariant/test/test.deepvariant.vcf.gz.tbi", + "variant_calling/freebayes", + "variant_calling/freebayes/test", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/test/test.freebayes.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi", + "variant_calling/freebayes/test2_vs_test", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz.tbi", + "variant_calling/msisensorpro", + "variant_calling/msisensorpro/test2_vs_test", + "variant_calling/msisensorpro/test2_vs_test/test2_vs_test", + "variant_calling/msisensorpro/test2_vs_test/test2_vs_test_dis", + "variant_calling/strelka", + "variant_calling/strelka/test", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/test2_vs_test", + "variant_calling/strelka/test2_vs_test/test2_vs_test.strelka.somatic_indels.vcf.gz", + "variant_calling/strelka/test2_vs_test/test2_vs_test.strelka.somatic_indels.vcf.gz.tbi", + "variant_calling/strelka/test2_vs_test/test2_vs_test.strelka.somatic_snvs.vcf.gz", + "variant_calling/strelka/test2_vs_test/test2_vs_test.strelka.somatic_snvs.vcf.gz.tbi", + "variant_calling/tiddit", + "variant_calling/tiddit/test", + "variant_calling/tiddit/test/test.tiddit.ploidies.tab", + "variant_calling/tiddit/test/test.tiddit.vcf.gz", + "variant_calling/tiddit/test/test.tiddit.vcf.gz.tbi", + "variant_calling/tiddit/test2_vs_test", + "variant_calling/tiddit/test2_vs_test/test2_vs_test.tiddit.normal.vcf.gz", + "variant_calling/tiddit/test2_vs_test/test2_vs_test.tiddit.normal.vcf.gz.tbi", + "variant_calling/tiddit/test2_vs_test/test2_vs_test.tiddit.ploidies.tab", + "variant_calling/tiddit/test2_vs_test/test2_vs_test.tiddit.tumor.vcf.gz", + "variant_calling/tiddit/test2_vs_test/test2_vs_test.tiddit.tumor.vcf.gz.tbi", + "variant_calling/tiddit/test2_vs_test/test2_vs_test.tiddit_sv_merge.vcf.gz" + ], + [ + "fastqc-status-check-heatmap.txt:md5,eeb4e7e7a45f4223c86bfe3aea81f90b", + "fastqc_adapter_content_plot.txt:md5,cc7a809f9f001c10646ee4199ccdb40f", + "fastqc_per_base_n_content_plot.txt:md5,1eba855ae0fa5b5ed4a1f90d1c97f759", + "fastqc_per_base_sequence_quality_plot.txt:md5,cbb2743dfb2ec74e72b578c83ec28ee8", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,73c884822eba0bafcdf34b90fe81aec5", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,24eeb00e5e2b11c7ab90a3223d429d15", + "fastqc_per_sequence_quality_scores_plot.txt:md5,6f048594f02effb93608665be29bd35a", + "fastqc_sequence_counts_plot.txt:md5,fca7ee9ef3382e2837a302d8c5d33769", + "fastqc_sequence_duplication_levels_plot.txt:md5,2aa0c6f33e4cffbb29cdabe2c28bb097", + "fastqc_sequence_length_distribution_plot.txt:md5,61b1fe978a2c73b86c30c27ee4bc60ae", + "mosdepth-coverage-per-contig-single.txt:md5,f73b2f18633bb4b386b4f9e9fcdffb6a", + "mosdepth-cumcoverage-dist-id.txt:md5,954642b30618e908300aa9ac1673985e", + "mosdepth_perchrom.txt:md5,f73b2f18633bb4b386b4f9e9fcdffb6a", + "multiqc_citations.txt:md5,ace4ca89138a5f1e2be289c157c00bd9", + "multiqc_fastqc.txt:md5,2fd25e8c81f962594b801d5a9df3cd87", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,13dab249f7bef935dfef234299550db8", + "samtools_alignment_plot.txt:md5,44149e0c5cc4bfa58242824b300219a2", + "test.bcftools.bcftools_stats.txt:md5,4b2997064fb500f090639edca1ccaa6e", + "test.deepvariant.bcftools_stats.txt:md5,61ab450ed778088b9c879b8dcbbb9db8", + "test.freebayes.filtered.bcftools_stats.txt:md5,dde124ceaf6f109cd274b837b950096b", + "test2_vs_test.freebayes.filtered.bcftools_stats.txt:md5,6f942caec2f4f02c69e1216226fa44a9", + "test.strelka.variants.bcftools_stats.txt:md5,2613827870dd789fe602a8a3b739b7f2", + "test2_vs_test.strelka.somatic_indels.bcftools_stats.txt:md5,5e8f9a8fdbc765ced736d0c8c7dd3a52", + "test2_vs_test.strelka.somatic_snvs.bcftools_stats.txt:md5,edb7763fad7b6f825e47e01ffa70adbc", + "test.tiddit.bcftools_stats.txt:md5,2b96446a75e1ec5d5f1310a934c0dd4c", + "test2_vs_test.tiddit_sv_merge.bcftools_stats.txt:md5,a6c45b2a2b7d8844e5536deee5c77af5", + "test.md.mosdepth.global.dist.txt:md5,76fa71922a3f748e507c2364c531dfcb", + "test.md.mosdepth.region.dist.txt:md5,abc5df85e302b79985627888870882da", + "test.md.mosdepth.summary.txt:md5,d536456436eb275159b8c6af83213d80", + "test.md.regions.bed.gz:md5,b25a2798061021c0b2f4e1d18219bbbd", + "test.md.regions.bed.gz.csi:md5,b1c2a861f64e20a94108a6de3b76c582", + "test.recal.mosdepth.global.dist.txt:md5,76fa71922a3f748e507c2364c531dfcb", + "test.recal.mosdepth.region.dist.txt:md5,abc5df85e302b79985627888870882da", + "test.recal.mosdepth.summary.txt:md5,d536456436eb275159b8c6af83213d80", + "test.recal.regions.bed.gz:md5,b25a2798061021c0b2f4e1d18219bbbd", + "test.recal.regions.bed.gz.csi:md5,b1c2a861f64e20a94108a6de3b76c582", + "test2.md.mosdepth.global.dist.txt:md5,2020cf6dfc7ddca020c921dd9f0549b7", + "test2.md.mosdepth.region.dist.txt:md5,38ff8b38c33b9231f047fea8ea830aae", + "test2.md.mosdepth.summary.txt:md5,8b991358768cade225470a07cd34f573", + "test2.md.regions.bed.gz:md5,08e767f91a0a8d82733f0040e804a85f", + "test2.md.regions.bed.gz.csi:md5,d5f1c9389ecf52ba839e834780a94549", + "test2.recal.mosdepth.global.dist.txt:md5,2020cf6dfc7ddca020c921dd9f0549b7", + "test2.recal.mosdepth.region.dist.txt:md5,38ff8b38c33b9231f047fea8ea830aae", + "test2.recal.mosdepth.summary.txt:md5,8b991358768cade225470a07cd34f573", + "test2.recal.regions.bed.gz:md5,08e767f91a0a8d82733f0040e804a85f", + "test2.recal.regions.bed.gz.csi:md5,d5f1c9389ecf52ba839e834780a94549", + "test.bcftools.FILTER.summary:md5,2c7c195b18f335698bae9619280492e8", + "test.bcftools.TsTv.count:md5,9748f609e7b83855e75804191c935ab3", + "test.deepvariant.FILTER.summary:md5,4908d916a23384a3b253d17b2ae784d7", + "test.deepvariant.TsTv.count:md5,d3c138521fd95eb45f6e495f39d47404", + "test.freebayes.filtered.FILTER.summary:md5,87e753ba2ad969475fb55661852f75e0", + "test.freebayes.filtered.TsTv.count:md5,845f64e5bb4224af98f3a47294cd5483", + "test2_vs_test.freebayes.filtered.FILTER.summary:md5,126e83dcd37b82420f7c5d7b235479f1", + "test2_vs_test.freebayes.filtered.TsTv.count:md5,28919c7d29c998681391d2027af3e0f9", + "test.strelka.variants.FILTER.summary:md5,dd87f507da7de20d5318841af312493b", + "test.strelka.variants.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "test2_vs_test.strelka.somatic_indels.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "test2_vs_test.strelka.somatic_indels.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f", + "test2_vs_test.strelka.somatic_snvs.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "test2_vs_test.strelka.somatic_snvs.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f", + "test.tiddit.FILTER.summary:md5,2cb5598e2a83870e162787c5025c9518", + "test.tiddit.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "test2_vs_test.tiddit_sv_merge.FILTER.summary:md5,2cb5598e2a83870e162787c5025c9518", + "test2_vs_test.tiddit_sv_merge.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f", + "genome.antitarget.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.target.bed:md5,ab3aafe8cc4cc3f1c40d527dfad64fda", + "reference.cnn:md5,7635ac4a5aff1502b21155f5e75dbe54", + "test.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test.bintest.cns:md5,87c17909f4ea3cc9b8bd225284094c3d", + "test.call.cns:md5,70d0d1fcb77739393e4e3a0406bdd34d", + "test.cnr:md5,429ff210a69fa82fb0fbac0e3e42b3c2", + "test.cns:md5,30e3d4ecd1ea2019c8bc195817dae61a", + "test.genemetrics.tsv:md5,5ec3555520f502f00f551ae7900a3824", + "test.germline.call.cns:md5,3f6f0a3dd3bd3fd0485e041b95b61145", + "test.targetcoverage.cnn:md5,0e0416a5dc409a9c311232092a058aec", + "genome.antitarget.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.target.bed:md5,d7fcdfe7dc055924fe7cc3fb990f5633", + "reference.cnn:md5,6542568cd3859ba3789c543c27d64b20", + "test.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test.targetcoverage.cnn:md5,bc82e785b5930b589c216516d1543985", + "test2.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test2.bintest.cns:md5,55dee1c582df1c4a2ab87103958e7ad4", + "test2.call.cns:md5,37e5745b8d3cd9a7212d02dfc234ee42", + "test2.cnr:md5,a98137a0a05fb23bab317caeaeb64db1", + "test2.cns:md5,bb571a25f48b4e80a4414d43d8e2c79c", + "test2.genemetrics.tsv:md5,5ec3555520f502f00f551ae7900a3824", + "test2.somatic.call.cns:md5,deaaa5fc0ddc44c029aa12d87e0cc4e6", + "test2.targetcoverage.cnn:md5,c8029a3886a9e0372ca24a5bc96ce264", + "test2_vs_test:md5,8d4908fbe1f6e3bd581c925d72788c54", + "test2_vs_test_dis:md5,6c68450054d0dd7a0b17a8cf9ef3337e" + ], + [ + "test.bam:md5,654909615a48db30bdc14ec4d9d7d17c", + "test2.bam:md5,f4205ab086600ba2927e1468dc732976" + ], + [ + "test.md.cram:md5,59ecc5c82c7af1283eea7507c590c831", + "test2.md.cram:md5,bac87cf9290577fd9a4def63e046031f", + "test.recal.cram:md5,654909615a48db30bdc14ec4d9d7d17c", + "test2.recal.cram:md5,f4205ab086600ba2927e1468dc732976" + ], + [ + [ + "test.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=732, phased=false, phasedAutodetect=false]" + ], + [ + "test2_vs_test.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=1447, phased=false, phasedAutodetect=false]" + ] + ], + [ + "test.bcftools.vcf.gz:md5,f627b06200401f34e124810ec89fde7c", + "test.cnvcall.vcf:md5,fa78210d08001ce5cecb67d8d1f1c9a0", + "test2_vs_test.cnvcall.vcf:md5,b65c037ec53243a3038b6c2e9c4a1b4b", + "test.deepvariant.g.vcf.gz:md5,c29fb457bb32ab95580d45e20048fc25", + "test.deepvariant.vcf.gz:md5,e6f8bbd0bc7a977f3af578e347f2cce6", + "test.strelka.genome.vcf.gz:md5,16437a040679d88b7d84a9276f793d6c", + "test.strelka.variants.vcf.gz:md5,666f835fdaf4952a179cdedd40c9d565", + "test2_vs_test.strelka.somatic_indels.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "test2_vs_test.strelka.somatic_snvs.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tiddit.vcf.gz:md5,c74645590b3d3c8d5f34e1e681ccd05b", + "test2_vs_test.tiddit.normal.vcf.gz:md5,c74645590b3d3c8d5f34e1e681ccd05b", + "test2_vs_test.tiddit.tumor.vcf.gz:md5,a4a162ddf9a49df61c62abbf704e2d19", + "test2_vs_test.tiddit_sv_merge.vcf.gz:md5,9105fb1263deef2521b302b38d77cad7" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998513#1/1", + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-08T16:27:17.419740053" + }, + "-profile test --input tests/csv/3.0/fastq_single.csv --tools cnvkit,deepvariant,freebayes,mpileup,strelka,tiddit": { + "content": [ + 55, + { + "BCFTOOLS_MPILEUP": { + "bcftools": 1.21 + }, + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CNVKIT_ANTITARGET": { + "cnvkit": "0.9.11" + }, + "CNVKIT_BATCH": { + "cnvkit": "0.9.10", + "samtools": 1.17 + }, + "CNVKIT_CALL": { + "cnvkit": "0.9.10" + }, + "CNVKIT_EXPORT": { + "cnvkit": "0.9.10" + }, + "CNVKIT_GENEMETRICS": { + "cnvkit": "0.9.10" + }, + "CNVKIT_REFERENCE": { + "cnvkit": "0.9.11" + }, + "CRAM_TO_BAM": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "DEEPVARIANT_RUNDEEPVARIANT": { + "deepvariant": "1.9.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIP_TIDDIT_SV": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_VC_FREEBAYES": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "TIDDIT_SV": { + "tiddit": "3.6.1" + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/converted", + "preprocessing/converted/cram_to_bam", + "preprocessing/converted/cram_to_bam/test", + "preprocessing/converted/cram_to_bam/test/test.bam", + "preprocessing/converted/cram_to_bam/test/test.bam.bai", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/bcftools", + "reports/bcftools/bcftools/test", + "reports/bcftools/bcftools/test/test.bcftools.bcftools_stats.txt", + "reports/bcftools/deepvariant", + "reports/bcftools/deepvariant/test", + "reports/bcftools/deepvariant/test/test.deepvariant.bcftools_stats.txt", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/test", + "reports/bcftools/freebayes/test/test.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/strelka", + "reports/bcftools/strelka/test", + "reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt", + "reports/bcftools/tiddit", + "reports/bcftools/tiddit/test", + "reports/bcftools/tiddit/test/test.tiddit.bcftools_stats.txt", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test-test_L2", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.zip", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/bcftools", + "reports/vcftools/bcftools/test", + "reports/vcftools/bcftools/test/test.bcftools.FILTER.summary", + "reports/vcftools/bcftools/test/test.bcftools.TsTv.count", + "reports/vcftools/bcftools/test/test.bcftools.TsTv.qual", + "reports/vcftools/deepvariant", + "reports/vcftools/deepvariant/test", + "reports/vcftools/deepvariant/test/test.deepvariant.FILTER.summary", + "reports/vcftools/deepvariant/test/test.deepvariant.TsTv.count", + "reports/vcftools/deepvariant/test/test.deepvariant.TsTv.qual", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/test", + "reports/vcftools/freebayes/test/test.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.qual", + "reports/vcftools/strelka", + "reports/vcftools/strelka/test", + "reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.count", + "reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual", + "reports/vcftools/tiddit", + "reports/vcftools/tiddit/test", + "reports/vcftools/tiddit/test/test.tiddit.FILTER.summary", + "reports/vcftools/tiddit/test/test.tiddit.TsTv.count", + "reports/vcftools/tiddit/test/test.tiddit.TsTv.qual", + "variant_calling", + "variant_calling/bcftools", + "variant_calling/bcftools/test", + "variant_calling/bcftools/test/test.bcftools.vcf.gz", + "variant_calling/bcftools/test/test.bcftools.vcf.gz.tbi", + "variant_calling/cnvkit", + "variant_calling/cnvkit/test", + "variant_calling/cnvkit/test/genome.antitarget.bed", + "variant_calling/cnvkit/test/genome.target.bed", + "variant_calling/cnvkit/test/reference.cnn", + "variant_calling/cnvkit/test/test-diagram.pdf", + "variant_calling/cnvkit/test/test-scatter.png", + "variant_calling/cnvkit/test/test.antitargetcoverage.cnn", + "variant_calling/cnvkit/test/test.bintest.cns", + "variant_calling/cnvkit/test/test.call.cns", + "variant_calling/cnvkit/test/test.cnr", + "variant_calling/cnvkit/test/test.cns", + "variant_calling/cnvkit/test/test.cnvcall.vcf", + "variant_calling/cnvkit/test/test.genemetrics.tsv", + "variant_calling/cnvkit/test/test.germline.call.cns", + "variant_calling/cnvkit/test/test.targetcoverage.cnn", + "variant_calling/deepvariant", + "variant_calling/deepvariant/test", + "variant_calling/deepvariant/test/test.deepvariant.g.vcf.gz", + "variant_calling/deepvariant/test/test.deepvariant.g.vcf.gz.tbi", + "variant_calling/deepvariant/test/test.deepvariant.vcf.gz", + "variant_calling/deepvariant/test/test.deepvariant.vcf.gz.tbi", + "variant_calling/freebayes", + "variant_calling/freebayes/test", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/test/test.freebayes.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi", + "variant_calling/strelka", + "variant_calling/strelka/test", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz", + "variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz", + "variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi", + "variant_calling/tiddit", + "variant_calling/tiddit/test", + "variant_calling/tiddit/test/test.tiddit.ploidies.tab", + "variant_calling/tiddit/test/test.tiddit.vcf.gz", + "variant_calling/tiddit/test/test.tiddit.vcf.gz.tbi" + ], + [ + "fastqc-status-check-heatmap.txt:md5,a020b9689ddeb4abec16b4854fe452f1", + "fastqc_adapter_content_plot.txt:md5,2e1b72be741319e7fadbbb39d7e5b37d", + "fastqc_per_base_n_content_plot.txt:md5,ad3b971a6bb4e8ba6c844c8a03584eb8", + "fastqc_per_base_sequence_quality_plot.txt:md5,1bc03889d243a944253ac637d81ae10c", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,2c42d140ce06c08dad2b58f397c23239", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,59e22821d350bfb97c37ffd9088f5ad9", + "fastqc_per_sequence_quality_scores_plot.txt:md5,f33615cc98bb6225f39545a415fa7c0f", + "fastqc_sequence_counts_plot.txt:md5,7f0f19a58e8e54e792a751fd04a9ae13", + "fastqc_sequence_duplication_levels_plot.txt:md5,92b02e250ff78725deb9a10d510fcecc", + "fastqc_sequence_length_distribution_plot.txt:md5,fb04dce68ec566314125bc9438211b28", + "mosdepth-coverage-per-contig-single.txt:md5,a0990e98cdd2cd5540d07f504f516ccf", + "mosdepth-cumcoverage-dist-id.txt:md5,caee7b9e5d1a451970f87d791c3e450b", + "mosdepth_perchrom.txt:md5,a0990e98cdd2cd5540d07f504f516ccf", + "multiqc_citations.txt:md5,ace4ca89138a5f1e2be289c157c00bd9", + "multiqc_fastqc.txt:md5,bde0d0bffa62228b33fb68b7e25b6ff8", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,ae7954084b2cd708c5e7369606bf4208", + "samtools_alignment_plot.txt:md5,438e719bf574a46726dbd2e0f1442e42", + "test.bcftools.bcftools_stats.txt:md5,4b2997064fb500f090639edca1ccaa6e", + "test.deepvariant.bcftools_stats.txt:md5,61ab450ed778088b9c879b8dcbbb9db8", + "test.freebayes.filtered.bcftools_stats.txt:md5,c6b6e221504c69ee75b209f4a0b2506a", + "test.strelka.variants.bcftools_stats.txt:md5,2613827870dd789fe602a8a3b739b7f2", + "test.tiddit.bcftools_stats.txt:md5,2b96446a75e1ec5d5f1310a934c0dd4c", + "test.md.mosdepth.global.dist.txt:md5,ef7c375ae07aec5540f9892b9b556b73", + "test.md.mosdepth.region.dist.txt:md5,212efff2213f6fc1c3204daf68bbb8c8", + "test.md.mosdepth.summary.txt:md5,72114393647ff64503522760218b30f0", + "test.md.regions.bed.gz:md5,985db429051ddcd5eae177da6fb55ad6", + "test.md.regions.bed.gz.csi:md5,3fa0f8272fefafe3cd840376d34a94a2", + "test.recal.mosdepth.global.dist.txt:md5,ef7c375ae07aec5540f9892b9b556b73", + "test.recal.mosdepth.region.dist.txt:md5,212efff2213f6fc1c3204daf68bbb8c8", + "test.recal.mosdepth.summary.txt:md5,72114393647ff64503522760218b30f0", + "test.recal.regions.bed.gz:md5,985db429051ddcd5eae177da6fb55ad6", + "test.recal.regions.bed.gz.csi:md5,3fa0f8272fefafe3cd840376d34a94a2", + "test.bcftools.FILTER.summary:md5,2c7c195b18f335698bae9619280492e8", + "test.bcftools.TsTv.count:md5,9748f609e7b83855e75804191c935ab3", + "test.deepvariant.FILTER.summary:md5,4908d916a23384a3b253d17b2ae784d7", + "test.deepvariant.TsTv.count:md5,d3c138521fd95eb45f6e495f39d47404", + "test.freebayes.filtered.FILTER.summary:md5,9ae0931339e231f90a4b5c330f7f6d55", + "test.freebayes.filtered.TsTv.count:md5,845f64e5bb4224af98f3a47294cd5483", + "test.strelka.variants.FILTER.summary:md5,dd87f507da7de20d5318841af312493b", + "test.strelka.variants.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "test.tiddit.FILTER.summary:md5,2cb5598e2a83870e162787c5025c9518", + "test.tiddit.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "genome.antitarget.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.target.bed:md5,ab3aafe8cc4cc3f1c40d527dfad64fda", + "reference.cnn:md5,7635ac4a5aff1502b21155f5e75dbe54", + "test.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test.bintest.cns:md5,a7417c1201ab6e748ad04563b3a5d51a", + "test.call.cns:md5,d4864f02138dc408c4a56f3d04820b03", + "test.cnr:md5,4e6b6b8c72444ca13ec3149c03a86ddf", + "test.cns:md5,1c8fb73f3084ed202a851b1f757a0faa", + "test.genemetrics.tsv:md5,5ec3555520f502f00f551ae7900a3824", + "test.germline.call.cns:md5,41095573157b8ee02082cad2327ea7b6", + "test.targetcoverage.cnn:md5,47c4b06de7ccdf854a836da493889ab0" + ], + [ + "test.bam:md5,dbd6f40b1e6d72501dc034e62e9d54eb" + ], + [ + "test.md.cram:md5,724c601c9daf019d356a53a7d5e1c8b1", + "test.recal.cram:md5,dbd6f40b1e6d72501dc034e62e9d54eb" + ], + [ + [ + "test.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=738, phased=false, phasedAutodetect=false]" + ] + ], + [ + "test.bcftools.vcf.gz:md5,f627b06200401f34e124810ec89fde7c", + "test.cnvcall.vcf:md5,ad603307280549c2053c2e3b57141c0a", + "test.deepvariant.g.vcf.gz:md5,c29fb457bb32ab95580d45e20048fc25", + "test.deepvariant.vcf.gz:md5,e6f8bbd0bc7a977f3af578e347f2cce6", + "test.freebayes.filtered.vcf.gz:md5,bf085c88aa26191a55fbd23bff6a498f", + "test.strelka.genome.vcf.gz:md5,16437a040679d88b7d84a9276f793d6c", + "test.strelka.variants.vcf.gz:md5,666f835fdaf4952a179cdedd40c9d565", + "test.tiddit.vcf.gz:md5,ac44cc2f44ebec84f5377e3274131876" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-08T12:49:19.475669979" + }, + "-profile test --input tests/csv/3.0/fastq_tumor_only.csv --tools cnvkit,freebayes,mpileup,mutect2,tiddit": { + "content": [ + 49, + { + "BCFTOOLS_MPILEUP": { + "bcftools": 1.21 + }, + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CALCULATECONTAMINATION": { + "gatk4": "4.6.1.0" + }, + "CNVKIT_ANTITARGET": { + "cnvkit": "0.9.11" + }, + "CNVKIT_BATCH": { + "cnvkit": "0.9.10" + }, + "CNVKIT_REFERENCE": { + "cnvkit": "0.9.11" + }, + "CRAM_TO_BAM": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FILTERMUTECTCALLS": { + "gatk4": "4.6.1.0" + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "GETPILEUPSUMMARIES": { + "gatk4": "4.6.1.0" + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "LEARNREADORIENTATIONMODEL": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MUTECT2": { + "gatk4": "4.6.1.0" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIP_TIDDIT_SV": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_VC_FREEBAYES": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "TIDDIT_SV": { + "tiddit": "3.6.1" + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/converted", + "preprocessing/converted/cram_to_bam", + "preprocessing/converted/cram_to_bam/test2", + "preprocessing/converted/cram_to_bam/test2/test2.bam", + "preprocessing/converted/cram_to_bam/test2/test2.bam.bai", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test2", + "preprocessing/markduplicates/test2/test2.md.cram", + "preprocessing/markduplicates/test2/test2.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test2", + "preprocessing/recal_table/test2/test2.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test2", + "preprocessing/recalibrated/test2/test2.recal.cram", + "preprocessing/recalibrated/test2/test2.recal.cram.crai", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/bcftools", + "reports/bcftools/bcftools/test2", + "reports/bcftools/bcftools/test2/test2.bcftools.bcftools_stats.txt", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/test2", + "reports/bcftools/freebayes/test2/test2.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/mutect2", + "reports/bcftools/mutect2/test2", + "reports/bcftools/mutect2/test2/test2.mutect2.filtered.bcftools_stats.txt", + "reports/bcftools/tiddit", + "reports/bcftools/tiddit/test2", + "reports/bcftools/tiddit/test2/test2.tiddit.bcftools_stats.txt", + "reports/fastqc", + "reports/fastqc/test2-test_L1", + "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.html", + "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.zip", + "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.html", + "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test2", + "reports/markduplicates/test2/test2.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test2", + "reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt", + "reports/mosdepth/test2/test2.md.mosdepth.region.dist.txt", + "reports/mosdepth/test2/test2.md.mosdepth.summary.txt", + "reports/mosdepth/test2/test2.md.regions.bed.gz", + "reports/mosdepth/test2/test2.md.regions.bed.gz.csi", + "reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test2/test2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test2/test2.recal.mosdepth.summary.txt", + "reports/mosdepth/test2/test2.recal.regions.bed.gz", + "reports/mosdepth/test2/test2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test2", + "reports/samtools/test2/test2.md.cram.stats", + "reports/samtools/test2/test2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/bcftools", + "reports/vcftools/bcftools/test2", + "reports/vcftools/bcftools/test2/test2.bcftools.FILTER.summary", + "reports/vcftools/bcftools/test2/test2.bcftools.TsTv.count", + "reports/vcftools/bcftools/test2/test2.bcftools.TsTv.qual", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/test2", + "reports/vcftools/freebayes/test2/test2.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/test2/test2.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/test2/test2.freebayes.filtered.TsTv.qual", + "reports/vcftools/mutect2", + "reports/vcftools/mutect2/test2", + "reports/vcftools/mutect2/test2/test2.mutect2.filtered.FILTER.summary", + "reports/vcftools/mutect2/test2/test2.mutect2.filtered.TsTv.count", + "reports/vcftools/mutect2/test2/test2.mutect2.filtered.TsTv.qual", + "reports/vcftools/tiddit", + "reports/vcftools/tiddit/test2", + "reports/vcftools/tiddit/test2/test2.tiddit.FILTER.summary", + "reports/vcftools/tiddit/test2/test2.tiddit.TsTv.count", + "reports/vcftools/tiddit/test2/test2.tiddit.TsTv.qual", + "variant_calling", + "variant_calling/bcftools", + "variant_calling/bcftools/test2", + "variant_calling/bcftools/test2/test2.bcftools.vcf.gz", + "variant_calling/bcftools/test2/test2.bcftools.vcf.gz.tbi", + "variant_calling/cnvkit", + "variant_calling/cnvkit/test2", + "variant_calling/cnvkit/test2/cnvkit.reference.antitarget-tmp.bed", + "variant_calling/cnvkit/test2/cnvkit.reference.target-tmp.bed", + "variant_calling/cnvkit/test2/test2.antitargetcoverage.cnn", + "variant_calling/cnvkit/test2/test2.targetcoverage.cnn", + "variant_calling/freebayes", + "variant_calling/freebayes/test2", + "variant_calling/freebayes/test2/test2.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/test2/test2.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/test2/test2.freebayes.vcf.gz", + "variant_calling/freebayes/test2/test2.freebayes.vcf.gz.tbi", + "variant_calling/mutect2", + "variant_calling/mutect2/test2", + "variant_calling/mutect2/test2/test2.mutect2.artifactprior.tar.gz", + "variant_calling/mutect2/test2/test2.mutect2.contamination.table", + "variant_calling/mutect2/test2/test2.mutect2.filtered.vcf.gz", + "variant_calling/mutect2/test2/test2.mutect2.filtered.vcf.gz.filteringStats.tsv", + "variant_calling/mutect2/test2/test2.mutect2.filtered.vcf.gz.tbi", + "variant_calling/mutect2/test2/test2.mutect2.pileups.table", + "variant_calling/mutect2/test2/test2.mutect2.segmentation.table", + "variant_calling/mutect2/test2/test2.mutect2.vcf.gz", + "variant_calling/mutect2/test2/test2.mutect2.vcf.gz.stats", + "variant_calling/mutect2/test2/test2.mutect2.vcf.gz.tbi", + "variant_calling/tiddit", + "variant_calling/tiddit/test2", + "variant_calling/tiddit/test2/test2.tiddit.ploidies.tab", + "variant_calling/tiddit/test2/test2.tiddit.vcf.gz", + "variant_calling/tiddit/test2/test2.tiddit.vcf.gz.tbi" + ], + [ + "fastqc-status-check-heatmap.txt:md5,b82724a03a6ec93ee44fe855f34025d7", + "fastqc_adapter_content_plot.txt:md5,aacb772b9a0141acc4ab43287172d6f4", + "fastqc_per_base_n_content_plot.txt:md5,31bbffa022c118bc9a3ac18973b54775", + "fastqc_per_base_sequence_quality_plot.txt:md5,1ac8402bd8a31f6002ecf1882cc9639e", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,f57d340bbd9ca796a0b336bae53aa4d4", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,1aaa2f08d367f35108c469d6cdc72773", + "fastqc_per_sequence_quality_scores_plot.txt:md5,e56fb8c4dc6b366afd58be19058e7118", + "fastqc_sequence_counts_plot.txt:md5,e96ed3d75a347dfe3c36a4055ea5a7f6", + "fastqc_sequence_duplication_levels_plot.txt:md5,90fc887e4afd33c3a72200bd854de2a5", + "fastqc_sequence_length_distribution_plot.txt:md5,6373d807db4406a7c077df2262ec6e26", + "mosdepth-coverage-per-contig-single.txt:md5,2b508063b507f1c8b2e852e2dac7f0da", + "mosdepth-cumcoverage-dist-id.txt:md5,7764facd9f5cb14aa9a00aa35ede15ca", + "mosdepth_perchrom.txt:md5,2b508063b507f1c8b2e852e2dac7f0da", + "multiqc_citations.txt:md5,ace4ca89138a5f1e2be289c157c00bd9", + "multiqc_fastqc.txt:md5,d54614f8b2de16511949dbbde8f38a5d", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,157a95de8aa4dd8947bb965f0c9ae3c7", + "samtools_alignment_plot.txt:md5,46e8824724863d243a01cc239a5eff15", + "test2.bcftools.bcftools_stats.txt:md5,c61cda67fa314ecdeac858599ce732ee", + "test2.freebayes.filtered.bcftools_stats.txt:md5,57082cdcbbe9ffb4c63b314912227afb", + "test2.mutect2.filtered.bcftools_stats.txt:md5,3ecb2b3bb668d9f808787ffaa780fc03", + "test2.tiddit.bcftools_stats.txt:md5,1122042610f0aed654c8d8b817fd1c50", + "test2.md.mosdepth.global.dist.txt:md5,2020cf6dfc7ddca020c921dd9f0549b7", + "test2.md.mosdepth.region.dist.txt:md5,38ff8b38c33b9231f047fea8ea830aae", + "test2.md.mosdepth.summary.txt:md5,8b991358768cade225470a07cd34f573", + "test2.md.regions.bed.gz:md5,08e767f91a0a8d82733f0040e804a85f", + "test2.md.regions.bed.gz.csi:md5,d5f1c9389ecf52ba839e834780a94549", + "test2.recal.mosdepth.global.dist.txt:md5,2020cf6dfc7ddca020c921dd9f0549b7", + "test2.recal.mosdepth.region.dist.txt:md5,38ff8b38c33b9231f047fea8ea830aae", + "test2.recal.mosdepth.summary.txt:md5,8b991358768cade225470a07cd34f573", + "test2.recal.regions.bed.gz:md5,08e767f91a0a8d82733f0040e804a85f", + "test2.recal.regions.bed.gz.csi:md5,d5f1c9389ecf52ba839e834780a94549", + "test2.bcftools.FILTER.summary:md5,2a717afe607d6b59558f860518738542", + "test2.bcftools.TsTv.count:md5,0c9a9764d605cba41d6c088340688939", + "test2.freebayes.filtered.FILTER.summary:md5,24e14f2e2651745a8d74dfe2844afa24", + "test2.freebayes.filtered.TsTv.count:md5,60b173b4a649483b651fcfedf1f5d790", + "test2.mutect2.filtered.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "test2.mutect2.filtered.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "test2.tiddit.FILTER.summary:md5,cea83f893b7e8a5744bde7c54486013a", + "test2.tiddit.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "cnvkit.reference.antitarget-tmp.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "cnvkit.reference.target-tmp.bed:md5,14a7ba28453f8c8fc6ba5b044c517291", + "test2.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test2.targetcoverage.cnn:md5,a6b7efce89e19440e95138886b371066", + "test2.mutect2.contamination.table:md5,9c5a4666d682b84be6bf5ecf4305b15c", + "test2.mutect2.filtered.vcf.gz.filteringStats.tsv:md5,98e1b87a52999eb8f429ef4a7877eb3f", + "test2.mutect2.pileups.table:md5,a4069de22dd8deaeea222115f2738713", + "test2.mutect2.segmentation.table:md5,abed02f7a49bca992eb4e6f0006df85b", + "test2.mutect2.vcf.gz.stats:md5,c129c424770070e0218d3ab66f85ac82" + ], + [ + "test2.bam:md5,f4205ab086600ba2927e1468dc732976" + ], + [ + "test2.md.cram:md5,bac87cf9290577fd9a4def63e046031f", + "test2.recal.cram:md5,f4205ab086600ba2927e1468dc732976" + ], + [ + "test2.bcftools.vcf.gz:md5,43478c30492f2a1caab22047cf1ad632", + "test2.freebayes.filtered.vcf.gz:md5,8461ef8ccf651775bde7c29d0e563474", + "test2.mutect2.filtered.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "test2.mutect2.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "test2.tiddit.vcf.gz:md5,a4a162ddf9a49df61c62abbf704e2d19" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998513#1/1", + "WARN: No Panel-of-normal was specified for Mutect2." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-08T14:32:16.644778218" + } +} diff --git a/tests/variant_calling_ascat.nf.test b/tests/variant_calling_ascat.nf.test new file mode 100644 index 0000000000..1bf336d040 --- /dev/null +++ b/tests/variant_calling_ascat.nf.test @@ -0,0 +1,82 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools ascat --input ascat_somatic.csv", + params: [ + fasta : modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22/sequence/chr22_23800000-23980000.fa', + ascat_alleles : modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_alleles_hg38_chr22.txt', + ascat_genome : "hg38", + ascat_loci : modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_loci_hg38_chr22.txt', + ascat_loci_gc : null, + ascat_loci_rt : null, + input : "${projectDir}/tests/csv/3.0/ascat_somatic.csv", + step : "variant_calling", + tools : 'ascat', + no_intervals : true, + validate_params : false + ] + ], + [ + name: "-profile test --tools ascat --input ascat_somatic.csv --only_paired_variant_calling including gc and rt", + params: [ + fasta : modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22/sequence/chr22_23800000-23980000.fa', + ascat_alleles : modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_alleles_hg38_chr22.txt', + ascat_genome : "hg38", + ascat_loci : modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_loci_hg38_chr22.txt', + ascat_loci_gc : modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/GC_G1000_hg38_22.txt', + ascat_loci_rt : modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/RT_G1000_hg38_22.txt', + input : "${projectDir}/tests/csv/3.0/ascat_somatic.csv", + step : "variant_calling", + tools : 'ascat', + no_intervals : true, + validate_params : false + ] + ], + [ + name: "-profile test --tools ascat --input ascat_somatic.csv -stub", + params: [ + fasta : modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22/sequence/chr22_23800000-23980000.fa', + ascat_alleles : modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_alleles_hg38_chr22.txt', + ascat_genome : "hg38", + ascat_loci : modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_loci_hg38_chr22.txt', + ascat_loci_gc : null, + ascat_loci_rt : null, + input : "${projectDir}/tests/csv/3.0/ascat_somatic.csv", + step : "variant_calling", + tools : 'ascat', + no_intervals : true, + validate_params : false + ], + stub: true + ], + [ + name: "-profile test --tools ascat --input ascat_somatic.csv --only_paired_variant_calling -stub", + params: [ + fasta : modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22/sequence/chr22_23800000-23980000.fa', + ascat_alleles : modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_alleles_hg38_chr22.txt', + ascat_genome : "hg38", + ascat_loci : modules_testdata_base_path + 'genomics/homo_sapiens/genome/ascat/G1000_loci_hg38_chr22.txt', + ascat_loci_gc : null, + ascat_loci_rt : null, + input : "${projectDir}/tests/csv/3.0/ascat_somatic.csv", + step : "variant_calling", + tools : 'ascat', + no_intervals : true, + only_paired_variant_calling : true, + validate_params : false + ], + stub: true + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_ascat.nf.test.snap b/tests/variant_calling_ascat.nf.test.snap new file mode 100644 index 0000000000..f62fa027a4 --- /dev/null +++ b/tests/variant_calling_ascat.nf.test.snap @@ -0,0 +1,480 @@ +{ + "-profile test --tools ascat --input ascat_somatic.csv --only_paired_variant_calling -stub": { + "content": [ + 7, + { + "ASCAT": { + "alleleCounter": "4.3.0", + "bioconductor-ascat": "3.2.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/.stub", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/NA12878", + "reports/mosdepth/NA12878/NA12878.recal.global.dist.txt", + "reports/mosdepth/NA12878/NA12878.recal.per-base.bed.gz", + "reports/mosdepth/NA12878/NA12878.recal.per-base.bed.gz.csi", + "reports/mosdepth/NA12878/NA12878.recal.per-base.d4", + "reports/mosdepth/NA12878/NA12878.recal.quantized.bed.gz", + "reports/mosdepth/NA12878/NA12878.recal.quantized.bed.gz.csi", + "reports/mosdepth/NA12878/NA12878.recal.region.dist.txt", + "reports/mosdepth/NA12878/NA12878.recal.regions.bed.gz", + "reports/mosdepth/NA12878/NA12878.recal.regions.bed.gz.csi", + "reports/mosdepth/NA12878/NA12878.recal.summary.txt", + "reports/mosdepth/NA12878/NA12878.recal.thresholds.bed.gz", + "reports/mosdepth/NA12878/NA12878.recal.thresholds.bed.gz.csi", + "reports/mosdepth/NA12878_1X", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.global.dist.txt", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.per-base.bed.gz", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.per-base.bed.gz.csi", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.per-base.d4", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.quantized.bed.gz", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.quantized.bed.gz.csi", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.region.dist.txt", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.regions.bed.gz", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.regions.bed.gz.csi", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.summary.txt", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.thresholds.bed.gz", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.thresholds.bed.gz.csi", + "reports/samtools", + "reports/samtools/NA12878", + "reports/samtools/NA12878/NA12878.recal.cram.stats", + "reports/samtools/NA12878_1X", + "reports/samtools/NA12878_1X/NA12878_1X.recal.cram.stats", + "variant_calling", + "variant_calling/ascat", + "variant_calling/ascat/NA12878_1X_vs_NA12878", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.after_correction.gc_rt.test.tumour.germline.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.after_correction.gc_rt.test.tumour.tumour.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.before_correction.test.tumour.germline.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.before_correction.test.tumour.tumour.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.cnvs.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.metrics.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.purityploidy.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.segments.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour.ASPCF.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour.sunrise.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_normalBAF.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_normalLogR.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_tumourBAF.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_tumourLogR.txt" + ], + [ + "WARN: No LogRCorrection performed in ASCAT. For LogRCorrection to run, please provide either loci gc files or both loci gc files and loci rt files.", + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "timestamp": "2026-04-08T12:14:36.857806635", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.2" + } + }, + "-profile test --tools ascat --input ascat_somatic.csv -stub": { + "content": [ + 7, + { + "ASCAT": { + "alleleCounter": "4.3.0", + "bioconductor-ascat": "3.2.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/.stub", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/NA12878", + "reports/mosdepth/NA12878/NA12878.recal.global.dist.txt", + "reports/mosdepth/NA12878/NA12878.recal.per-base.bed.gz", + "reports/mosdepth/NA12878/NA12878.recal.per-base.bed.gz.csi", + "reports/mosdepth/NA12878/NA12878.recal.per-base.d4", + "reports/mosdepth/NA12878/NA12878.recal.quantized.bed.gz", + "reports/mosdepth/NA12878/NA12878.recal.quantized.bed.gz.csi", + "reports/mosdepth/NA12878/NA12878.recal.region.dist.txt", + "reports/mosdepth/NA12878/NA12878.recal.regions.bed.gz", + "reports/mosdepth/NA12878/NA12878.recal.regions.bed.gz.csi", + "reports/mosdepth/NA12878/NA12878.recal.summary.txt", + "reports/mosdepth/NA12878/NA12878.recal.thresholds.bed.gz", + "reports/mosdepth/NA12878/NA12878.recal.thresholds.bed.gz.csi", + "reports/mosdepth/NA12878_1X", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.global.dist.txt", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.per-base.bed.gz", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.per-base.bed.gz.csi", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.per-base.d4", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.quantized.bed.gz", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.quantized.bed.gz.csi", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.region.dist.txt", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.regions.bed.gz", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.regions.bed.gz.csi", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.summary.txt", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.thresholds.bed.gz", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.thresholds.bed.gz.csi", + "reports/samtools", + "reports/samtools/NA12878", + "reports/samtools/NA12878/NA12878.recal.cram.stats", + "reports/samtools/NA12878_1X", + "reports/samtools/NA12878_1X/NA12878_1X.recal.cram.stats", + "variant_calling", + "variant_calling/ascat", + "variant_calling/ascat/NA12878_1X_vs_NA12878", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.after_correction.gc_rt.test.tumour.germline.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.after_correction.gc_rt.test.tumour.tumour.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.before_correction.test.tumour.germline.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.before_correction.test.tumour.tumour.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.cnvs.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.metrics.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.purityploidy.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.segments.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour.ASPCF.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour.sunrise.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_normalBAF.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_normalLogR.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_tumourBAF.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_tumourLogR.txt" + ], + [ + "WARN: No LogRCorrection performed in ASCAT. For LogRCorrection to run, please provide either loci gc files or both loci gc files and loci rt files.", + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "timestamp": "2026-04-08T12:14:00.856421629", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.2" + } + }, + "-profile test --tools ascat --input ascat_somatic.csv --only_paired_variant_calling including gc and rt": { + "content": [ + 7, + { + "ASCAT": { + "alleleCounter": "4.3.0", + "ascat": "3.2.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/NA12878", + "reports/mosdepth/NA12878/NA12878.recal.mosdepth.global.dist.txt", + "reports/mosdepth/NA12878/NA12878.recal.mosdepth.region.dist.txt", + "reports/mosdepth/NA12878/NA12878.recal.mosdepth.summary.txt", + "reports/mosdepth/NA12878/NA12878.recal.regions.bed.gz", + "reports/mosdepth/NA12878/NA12878.recal.regions.bed.gz.csi", + "reports/mosdepth/NA12878_1X", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.mosdepth.global.dist.txt", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.mosdepth.region.dist.txt", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.mosdepth.summary.txt", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.regions.bed.gz", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/NA12878", + "reports/samtools/NA12878/NA12878.recal.cram.stats", + "reports/samtools/NA12878_1X", + "reports/samtools/NA12878_1X/NA12878_1X.recal.cram.stats", + "variant_calling", + "variant_calling/ascat", + "variant_calling/ascat/NA12878_1X_vs_NA12878", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.after_correction_gc_rt.NA12878_1X_vs_NA12878.tumour.germline.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.after_correction_gc_rt.NA12878_1X_vs_NA12878.tumour.tumour.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.before_correction.NA12878_1X_vs_NA12878.tumour.germline.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.before_correction.NA12878_1X_vs_NA12878.tumour.tumour.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.cnvs.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.metrics.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.purityploidy.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.segments.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour.ASPCF.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour.sunrise.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_normalBAF.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_normalBAF_rawBAF.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_normalLogR.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_tumourBAF.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_tumourBAF_rawBAF.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_tumourLogR.txt" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,a79d8d9cb20cf117f7510eb1e3674997", + "mosdepth-cumcoverage-dist-id.txt:md5,c5095d8ac96d906979d5ead383b14d73", + "mosdepth_perchrom.txt:md5,a79d8d9cb20cf117f7510eb1e3674997", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "samtools-stats-dp.txt:md5,272184ca54d87094df9ab12335618e9a", + "samtools_alignment_plot.txt:md5,cd6531ba39758cf6ad96ba9d83df2230", + "NA12878.recal.mosdepth.global.dist.txt:md5,a5ccac0290f8331d0aa7d68f0c9b0934", + "NA12878.recal.mosdepth.region.dist.txt:md5,81a999e01ee353b6a3dd5db321053ffc", + "NA12878.recal.mosdepth.summary.txt:md5,f78fade2ac186184fc44eb1d7ec443d0", + "NA12878.recal.regions.bed.gz:md5,2870532cc9d041c21999cb0a14d453b5", + "NA12878.recal.regions.bed.gz.csi:md5,58b5cd4cb9ab131a0f0a601eb72bcd04", + "NA12878_1X.recal.mosdepth.global.dist.txt:md5,8c70c08460d008b30c231a2d00fc3fb9", + "NA12878_1X.recal.mosdepth.region.dist.txt:md5,ddfe117ae7b0474c1e27857a282da9fb", + "NA12878_1X.recal.mosdepth.summary.txt:md5,8056360df7642e84fcc4355454cf617a", + "NA12878_1X.recal.regions.bed.gz:md5,0e6ff7afa2678e2dc667ec650bb18f75", + "NA12878_1X.recal.regions.bed.gz.csi:md5,5d07e60555f3087f5805dd20ed3bea8b", + "NA12878_1X_vs_NA12878.cnvs.txt:md5,68b329da9893e34099c7d8ad5cb9c940", + "NA12878_1X_vs_NA12878.purityploidy.txt:md5,f1484c2b120834d3db8774ad02a038b9", + "NA12878_1X_vs_NA12878.segments.txt:md5,68b329da9893e34099c7d8ad5cb9c940", + "NA12878_1X_vs_NA12878.tumour_normalBAF.txt:md5,44e9d560446d4f3f998b5b0ba48a2358", + "NA12878_1X_vs_NA12878.tumour_normalBAF_rawBAF.txt:md5,09e38bc59d2eb9b63bc055b145ea329e", + "NA12878_1X_vs_NA12878.tumour_normalLogR.txt:md5,b5f37e77157d2ced768a52356a3bcb79", + "NA12878_1X_vs_NA12878.tumour_tumourBAF.txt:md5,90a2d5db5b8729b76aa113f8533738fc", + "NA12878_1X_vs_NA12878.tumour_tumourBAF_rawBAF.txt:md5,3f9320be796f6144f2ce9b7fac0625c3", + "NA12878_1X_vs_NA12878.tumour_tumourLogR.txt:md5,d8654a8fa37553f87232b2bd4a184dbc" + ], + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings" + ], + "timestamp": "2025-09-30T22:09:59.120270404", + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + } + }, + "-profile test --tools ascat --input ascat_somatic.csv": { + "content": [ + 7, + { + "ASCAT": { + "alleleCounter": "4.3.0", + "ascat": "3.2.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/NA12878", + "reports/mosdepth/NA12878/NA12878.recal.mosdepth.global.dist.txt", + "reports/mosdepth/NA12878/NA12878.recal.mosdepth.region.dist.txt", + "reports/mosdepth/NA12878/NA12878.recal.mosdepth.summary.txt", + "reports/mosdepth/NA12878/NA12878.recal.regions.bed.gz", + "reports/mosdepth/NA12878/NA12878.recal.regions.bed.gz.csi", + "reports/mosdepth/NA12878_1X", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.mosdepth.global.dist.txt", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.mosdepth.region.dist.txt", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.mosdepth.summary.txt", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.regions.bed.gz", + "reports/mosdepth/NA12878_1X/NA12878_1X.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/NA12878", + "reports/samtools/NA12878/NA12878.recal.cram.stats", + "reports/samtools/NA12878_1X", + "reports/samtools/NA12878_1X/NA12878_1X.recal.cram.stats", + "variant_calling", + "variant_calling/ascat", + "variant_calling/ascat/NA12878_1X_vs_NA12878", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.before_correction.NA12878_1X_vs_NA12878.tumour.germline.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.before_correction.NA12878_1X_vs_NA12878.tumour.tumour.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.cnvs.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.metrics.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.purityploidy.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.segments.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour.ASPCF.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour.sunrise.png", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_normalBAF.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_normalBAF_rawBAF.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_normalLogR.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_tumourBAF.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_tumourBAF_rawBAF.txt", + "variant_calling/ascat/NA12878_1X_vs_NA12878/NA12878_1X_vs_NA12878.tumour_tumourLogR.txt" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,a79d8d9cb20cf117f7510eb1e3674997", + "mosdepth-cumcoverage-dist-id.txt:md5,c5095d8ac96d906979d5ead383b14d73", + "mosdepth_perchrom.txt:md5,a79d8d9cb20cf117f7510eb1e3674997", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "samtools-stats-dp.txt:md5,272184ca54d87094df9ab12335618e9a", + "samtools_alignment_plot.txt:md5,cd6531ba39758cf6ad96ba9d83df2230", + "NA12878.recal.mosdepth.global.dist.txt:md5,a5ccac0290f8331d0aa7d68f0c9b0934", + "NA12878.recal.mosdepth.region.dist.txt:md5,81a999e01ee353b6a3dd5db321053ffc", + "NA12878.recal.mosdepth.summary.txt:md5,f78fade2ac186184fc44eb1d7ec443d0", + "NA12878.recal.regions.bed.gz:md5,2870532cc9d041c21999cb0a14d453b5", + "NA12878.recal.regions.bed.gz.csi:md5,58b5cd4cb9ab131a0f0a601eb72bcd04", + "NA12878_1X.recal.mosdepth.global.dist.txt:md5,8c70c08460d008b30c231a2d00fc3fb9", + "NA12878_1X.recal.mosdepth.region.dist.txt:md5,ddfe117ae7b0474c1e27857a282da9fb", + "NA12878_1X.recal.mosdepth.summary.txt:md5,8056360df7642e84fcc4355454cf617a", + "NA12878_1X.recal.regions.bed.gz:md5,0e6ff7afa2678e2dc667ec650bb18f75", + "NA12878_1X.recal.regions.bed.gz.csi:md5,5d07e60555f3087f5805dd20ed3bea8b", + "NA12878_1X_vs_NA12878.cnvs.txt:md5,68b329da9893e34099c7d8ad5cb9c940", + "NA12878_1X_vs_NA12878.purityploidy.txt:md5,f1484c2b120834d3db8774ad02a038b9", + "NA12878_1X_vs_NA12878.segments.txt:md5,68b329da9893e34099c7d8ad5cb9c940", + "NA12878_1X_vs_NA12878.tumour_normalBAF.txt:md5,44e9d560446d4f3f998b5b0ba48a2358", + "NA12878_1X_vs_NA12878.tumour_normalBAF_rawBAF.txt:md5,09e38bc59d2eb9b63bc055b145ea329e", + "NA12878_1X_vs_NA12878.tumour_normalLogR.txt:md5,b5f37e77157d2ced768a52356a3bcb79", + "NA12878_1X_vs_NA12878.tumour_tumourBAF.txt:md5,90a2d5db5b8729b76aa113f8533738fc", + "NA12878_1X_vs_NA12878.tumour_tumourBAF_rawBAF.txt:md5,3f9320be796f6144f2ce9b7fac0625c3", + "NA12878_1X_vs_NA12878.tumour_tumourLogR.txt:md5,d8654a8fa37553f87232b2bd4a184dbc" + ], + "No BAM files", + "No CRAM files", + "No VCF files", + [ + "WARN: No LogRCorrection performed in ASCAT. For LogRCorrection to run, please provide either loci gc files or both loci gc files and loci rt files." + ] + ], + "timestamp": "2025-12-15T22:00:03.974713999", + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + } + } +} \ No newline at end of file diff --git a/tests/variant_calling_cnvkit.nf.test b/tests/variant_calling_cnvkit.nf.test new file mode 100644 index 0000000000..baca3f96ab --- /dev/null +++ b/tests/variant_calling_cnvkit.nf.test @@ -0,0 +1,82 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools cnvkit --input recalibrated_germline.csv", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_germline.csv", + step: "variant_calling", + tools: 'cnvkit' + ] + ], + [ + name: "-profile test --tools cnvkit --input recalibrated_somatic.csv", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + step: "variant_calling", + tools: 'cnvkit' + ] + ], + [ + name: "-profile test --tools cnvkit --input recalibrated_tumoronly.csv", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + step: "variant_calling", + tools: 'cnvkit' + ] + ], + [ + name: "-profile test --tools cnvkit --input recalibrated.csv --only_paired_variant_calling", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated.csv", + step: "variant_calling", + tools: 'cnvkit', + only_paired_variant_calling: true + ] + ], + [ + name: "-profile test --tools cnvkit --input recalibrated_somatic.csv --no_intervals", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + step: "variant_calling", + tools: 'cnvkit', + no_intervals: true + ] + ], + [ + name: "-profile test --tools cnvkit --input recalibrated_tumoronly.csv --no_intervals", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + step: "variant_calling", + tools: 'cnvkit', + no_intervals: true + ] + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_cnvkit.nf.test.snap b/tests/variant_calling_cnvkit.nf.test.snap new file mode 100644 index 0000000000..efb1a587cc --- /dev/null +++ b/tests/variant_calling_cnvkit.nf.test.snap @@ -0,0 +1,1127 @@ +{ + "-profile test --tools cnvkit --input recalibrated_somatic.csv": { + "content": [ + 20, + { + "CNVKIT_ANTITARGET": { + "cnvkit": "0.9.11" + }, + "CNVKIT_BATCH": { + "cnvkit": "0.9.10", + "samtools": 1.17 + }, + "CNVKIT_CALL": { + "cnvkit": "0.9.10" + }, + "CNVKIT_EXPORT": { + "cnvkit": "0.9.10" + }, + "CNVKIT_GENEMETRICS": { + "cnvkit": "0.9.10" + }, + "CNVKIT_REFERENCE": { + "cnvkit": "0.9.11" + }, + "CRAM_TO_BAM": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/converted", + "preprocessing/converted/cram_to_bam", + "preprocessing/converted/cram_to_bam/sample3", + "preprocessing/converted/cram_to_bam/sample3/sample3.bam", + "preprocessing/converted/cram_to_bam/sample3/sample3.bam.bai", + "preprocessing/converted/cram_to_bam/sample4", + "preprocessing/converted/cram_to_bam/sample4/sample4.bam", + "preprocessing/converted/cram_to_bam/sample4/sample4.bam.bai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "variant_calling", + "variant_calling/cnvkit", + "variant_calling/cnvkit/sample3", + "variant_calling/cnvkit/sample3/multi_intervals.antitarget.bed", + "variant_calling/cnvkit/sample3/multi_intervals.target.bed", + "variant_calling/cnvkit/sample3/reference.cnn", + "variant_calling/cnvkit/sample3/sample3.cnvcall.vcf", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted-diagram.pdf", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted-scatter.png", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.bintest.cns", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.call.cns", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.cnr", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.cns", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.genemetrics.tsv", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.germline.call.cns", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.targetcoverage.cnn", + "variant_calling/cnvkit/sample4_vs_sample3", + "variant_calling/cnvkit/sample4_vs_sample3/multi_intervals.antitarget.bed", + "variant_calling/cnvkit/sample4_vs_sample3/multi_intervals.target.bed", + "variant_calling/cnvkit/sample4_vs_sample3/reference.cnn", + "variant_calling/cnvkit/sample4_vs_sample3/sample3.antitargetcoverage.cnn", + "variant_calling/cnvkit/sample4_vs_sample3/sample3.targetcoverage.cnn", + "variant_calling/cnvkit/sample4_vs_sample3/sample4-diagram.pdf", + "variant_calling/cnvkit/sample4_vs_sample3/sample4-scatter.png", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.antitargetcoverage.cnn", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.bintest.cns", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.call.cns", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.cnr", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.cns", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.genemetrics.tsv", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.somatic.call.cns", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.targetcoverage.cnn", + "variant_calling/cnvkit/sample4_vs_sample3/sample4_vs_sample3.cnvcall.vcf" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,336d786b273c4d4e714d51c44207ff56", + "mosdepth-cumcoverage-dist-id.txt:md5,0cc87c596a0fc30ebb676c4587e986de", + "mosdepth_perchrom.txt:md5,336d786b273c4d4e714d51c44207ff56", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample3.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample3.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample3.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample3.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample3.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample4.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample4.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample4.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample4.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample4.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "multi_intervals.antitarget.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "multi_intervals.target.bed:md5,f7474a0afbf5565c5675916606f2d9bd", + "reference.cnn:md5,891454915f82d0eeda0be13d71e0d5d7", + "test.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test.paired_end.recalibrated.sorted.bintest.cns:md5,8d0301673b584a6a8d88bdaad87ba09d", + "test.paired_end.recalibrated.sorted.call.cns:md5,9254686f5d1a2718cc2a20f28d0ef3d7", + "test.paired_end.recalibrated.sorted.cnr:md5,da3986c5e7a9c655ce7300062cfc00fa", + "test.paired_end.recalibrated.sorted.cns:md5,e25853dfbbb65f5294a45a2530cded76", + "test.paired_end.recalibrated.sorted.genemetrics.tsv:md5,5ec3555520f502f00f551ae7900a3824", + "test.paired_end.recalibrated.sorted.germline.call.cns:md5,5bba48aecc2282971b9d4c562bec4591", + "test.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,4153756b30abc934f10717c023bd262f", + "multi_intervals.antitarget.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "multi_intervals.target.bed:md5,f9e727147a7cbca46c2b9b2de3ecae91", + "reference.cnn:md5,e0aefc1c8a40e6919dc9cfb686e51f7c", + "sample3.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "sample3.targetcoverage.cnn:md5,826087b08d380135a271e2774c94084c", + "sample4.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "sample4.bintest.cns:md5,b5ccf48e99562082026e94f8d8aa1133", + "sample4.call.cns:md5,8202ec610e834c58bb7199761fccd9e5", + "sample4.cnr:md5,47d799d37af7ca8022fcac0ec705a3b8", + "sample4.cns:md5,50352818fd705424a7ab770d7707c312", + "sample4.genemetrics.tsv:md5,5ec3555520f502f00f551ae7900a3824", + "sample4.somatic.call.cns:md5,1cb4febd5c88d306277f3c4c3117d68c", + "sample4.targetcoverage.cnn:md5,62c2d1f8765618c454bf4455c2298344" + ], + [ + "sample3.bam:md5,891da60dc1cc5c6455bfc1442aea0982", + "sample4.bam:md5,36856b3e6dd1bc0f10688b9d8c02faa8" + ], + "No CRAM files", + [ + "sample3.cnvcall.vcf:md5,6f8c517f62715c47674c79ffd1e9babb", + "sample4_vs_sample3.cnvcall.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T22:16:22.938969536" + }, + "-profile test --tools cnvkit --input recalibrated.csv --only_paired_variant_calling": { + "content": [ + 30, + { + "CNVKIT_ANTITARGET": { + "cnvkit": "0.9.11" + }, + "CNVKIT_BATCH": { + "cnvkit": "0.9.10", + "samtools": 1.17 + }, + "CNVKIT_CALL": { + "cnvkit": "0.9.10" + }, + "CNVKIT_EXPORT": { + "cnvkit": "0.9.10" + }, + "CNVKIT_GENEMETRICS": { + "cnvkit": "0.9.10" + }, + "CNVKIT_REFERENCE": { + "cnvkit": "0.9.11" + }, + "CRAM_TO_BAM": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/converted", + "preprocessing/converted/cram_to_bam", + "preprocessing/converted/cram_to_bam/sample1", + "preprocessing/converted/cram_to_bam/sample1/sample1.bam", + "preprocessing/converted/cram_to_bam/sample1/sample1.bam.bai", + "preprocessing/converted/cram_to_bam/sample2", + "preprocessing/converted/cram_to_bam/sample2/sample2.bam", + "preprocessing/converted/cram_to_bam/sample2/sample2.bam.bai", + "preprocessing/converted/cram_to_bam/sample3", + "preprocessing/converted/cram_to_bam/sample3/sample3.bam", + "preprocessing/converted/cram_to_bam/sample3/sample3.bam.bai", + "preprocessing/converted/cram_to_bam/sample4", + "preprocessing/converted/cram_to_bam/sample4/sample4.bam", + "preprocessing/converted/cram_to_bam/sample4/sample4.bam.bai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/sample1", + "reports/mosdepth/sample1/sample1.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.summary.txt", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz.csi", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample1", + "reports/samtools/sample1/sample1.recal.cram.stats", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "variant_calling", + "variant_calling/cnvkit", + "variant_calling/cnvkit/sample1", + "variant_calling/cnvkit/sample1/multi_intervals.antitarget.bed", + "variant_calling/cnvkit/sample1/multi_intervals.target.bed", + "variant_calling/cnvkit/sample1/reference.cnn", + "variant_calling/cnvkit/sample1/sample1.cnvcall.vcf", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted-diagram.pdf", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted-scatter.png", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.bintest.cns", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.call.cns", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.cnr", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.cns", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.genemetrics.tsv", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.germline.call.cns", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.targetcoverage.cnn", + "variant_calling/cnvkit/sample2", + "variant_calling/cnvkit/sample2/cnvkit.reference.antitarget-tmp.bed", + "variant_calling/cnvkit/sample2/cnvkit.reference.target-tmp.bed", + "variant_calling/cnvkit/sample2/sample2-diagram.pdf", + "variant_calling/cnvkit/sample2/sample2-scatter.png", + "variant_calling/cnvkit/sample2/sample2.antitargetcoverage.cnn", + "variant_calling/cnvkit/sample2/sample2.bintest.cns", + "variant_calling/cnvkit/sample2/sample2.call.cns", + "variant_calling/cnvkit/sample2/sample2.cnr", + "variant_calling/cnvkit/sample2/sample2.cns", + "variant_calling/cnvkit/sample2/sample2.cnvcall.vcf", + "variant_calling/cnvkit/sample2/sample2.genemetrics.tsv", + "variant_calling/cnvkit/sample2/sample2.targetcoverage.cnn", + "variant_calling/cnvkit/sample2/sample2.tumor_only.call.cns", + "variant_calling/cnvkit/sample4_vs_sample3", + "variant_calling/cnvkit/sample4_vs_sample3/multi_intervals.antitarget.bed", + "variant_calling/cnvkit/sample4_vs_sample3/multi_intervals.target.bed", + "variant_calling/cnvkit/sample4_vs_sample3/reference.cnn", + "variant_calling/cnvkit/sample4_vs_sample3/sample3.antitargetcoverage.cnn", + "variant_calling/cnvkit/sample4_vs_sample3/sample3.targetcoverage.cnn", + "variant_calling/cnvkit/sample4_vs_sample3/sample4-diagram.pdf", + "variant_calling/cnvkit/sample4_vs_sample3/sample4-scatter.png", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.antitargetcoverage.cnn", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.bintest.cns", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.call.cns", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.cnr", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.cns", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.genemetrics.tsv", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.somatic.call.cns", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.targetcoverage.cnn", + "variant_calling/cnvkit/sample4_vs_sample3/sample4_vs_sample3.cnvcall.vcf" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,73ef9a077df1887f9021a581fbf207bc", + "mosdepth-cumcoverage-dist-id.txt:md5,ad0637d55d7025330f2f6cb7f9680e64", + "mosdepth_perchrom.txt:md5,73ef9a077df1887f9021a581fbf207bc", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "samtools-stats-dp.txt:md5,b446a47b182d93a9e7e74f5f7c8d41c2", + "samtools_alignment_plot.txt:md5,7138a2d29f515993e1df8d745e27b757", + "sample1.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample1.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample1.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample1.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample1.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample2.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample2.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample2.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample2.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample2.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample3.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample3.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample3.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample3.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample3.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample4.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample4.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample4.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample4.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample4.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "multi_intervals.antitarget.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "multi_intervals.target.bed:md5,f7474a0afbf5565c5675916606f2d9bd", + "reference.cnn:md5,891454915f82d0eeda0be13d71e0d5d7", + "test.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test.paired_end.recalibrated.sorted.bintest.cns:md5,8d0301673b584a6a8d88bdaad87ba09d", + "test.paired_end.recalibrated.sorted.call.cns:md5,9254686f5d1a2718cc2a20f28d0ef3d7", + "test.paired_end.recalibrated.sorted.cnr:md5,da3986c5e7a9c655ce7300062cfc00fa", + "test.paired_end.recalibrated.sorted.cns:md5,e25853dfbbb65f5294a45a2530cded76", + "test.paired_end.recalibrated.sorted.genemetrics.tsv:md5,5ec3555520f502f00f551ae7900a3824", + "test.paired_end.recalibrated.sorted.germline.call.cns:md5,5bba48aecc2282971b9d4c562bec4591", + "test.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,4153756b30abc934f10717c023bd262f", + "cnvkit.reference.antitarget-tmp.bed:md5,3d4d20f9f23b39970865d29ef239d20b", + "cnvkit.reference.target-tmp.bed:md5,657b25dbda8516624efa8cb2cf3716ca", + "sample2.antitargetcoverage.cnn:md5,067115082c4af4b64d58c0dc3a3642e4", + "sample2.bintest.cns:md5,7a66b5f63acb05e6dfb0784c215851ec", + "sample2.call.cns:md5,f7caeca04aba28b125ce26b511f42afb", + "sample2.cnr:md5,d9bdb71ce807051369577ee7f807a40c", + "sample2.cns:md5,2b56aac606ba6183d018b30ca58afcec", + "sample2.genemetrics.tsv:md5,5ec3555520f502f00f551ae7900a3824", + "sample2.targetcoverage.cnn:md5,e6d0190c1c37ce6e41f76ca5b24ccca3", + "sample2.tumor_only.call.cns:md5,70a308c6db7acf1a5fd623936cac6412", + "multi_intervals.antitarget.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "multi_intervals.target.bed:md5,f9e727147a7cbca46c2b9b2de3ecae91", + "reference.cnn:md5,e0aefc1c8a40e6919dc9cfb686e51f7c", + "sample3.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "sample3.targetcoverage.cnn:md5,826087b08d380135a271e2774c94084c", + "sample4.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "sample4.bintest.cns:md5,b5ccf48e99562082026e94f8d8aa1133", + "sample4.call.cns:md5,8202ec610e834c58bb7199761fccd9e5", + "sample4.cnr:md5,47d799d37af7ca8022fcac0ec705a3b8", + "sample4.cns:md5,50352818fd705424a7ab770d7707c312", + "sample4.genemetrics.tsv:md5,5ec3555520f502f00f551ae7900a3824", + "sample4.somatic.call.cns:md5,1cb4febd5c88d306277f3c4c3117d68c", + "sample4.targetcoverage.cnn:md5,62c2d1f8765618c454bf4455c2298344" + ], + [ + "sample1.bam:md5,891da60dc1cc5c6455bfc1442aea0982", + "sample2.bam:md5,36856b3e6dd1bc0f10688b9d8c02faa8", + "sample3.bam:md5,891da60dc1cc5c6455bfc1442aea0982", + "sample4.bam:md5,36856b3e6dd1bc0f10688b9d8c02faa8" + ], + "No CRAM files", + [ + "sample1.cnvcall.vcf:md5,6f8c517f62715c47674c79ffd1e9babb", + "sample2.cnvcall.vcf:md5,35662b51f071469c6227be15114eb824", + "sample4_vs_sample3.cnvcall.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T22:19:35.224650709" + }, + "-profile test --tools cnvkit --input recalibrated_germline.csv": { + "content": [ + 13, + { + "CNVKIT_ANTITARGET": { + "cnvkit": "0.9.11" + }, + "CNVKIT_BATCH": { + "cnvkit": "0.9.10", + "samtools": 1.17 + }, + "CNVKIT_CALL": { + "cnvkit": "0.9.10" + }, + "CNVKIT_EXPORT": { + "cnvkit": "0.9.10" + }, + "CNVKIT_GENEMETRICS": { + "cnvkit": "0.9.10" + }, + "CNVKIT_REFERENCE": { + "cnvkit": "0.9.11" + }, + "CRAM_TO_BAM": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/converted", + "preprocessing/converted/cram_to_bam", + "preprocessing/converted/cram_to_bam/sample1", + "preprocessing/converted/cram_to_bam/sample1/sample1.bam", + "preprocessing/converted/cram_to_bam/sample1/sample1.bam.bai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/sample1", + "reports/mosdepth/sample1/sample1.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.summary.txt", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample1", + "reports/samtools/sample1/sample1.recal.cram.stats", + "variant_calling", + "variant_calling/cnvkit", + "variant_calling/cnvkit/sample1", + "variant_calling/cnvkit/sample1/multi_intervals.antitarget.bed", + "variant_calling/cnvkit/sample1/multi_intervals.target.bed", + "variant_calling/cnvkit/sample1/reference.cnn", + "variant_calling/cnvkit/sample1/sample1.cnvcall.vcf", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted-diagram.pdf", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted-scatter.png", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.bintest.cns", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.call.cns", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.cnr", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.cns", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.genemetrics.tsv", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.germline.call.cns", + "variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.targetcoverage.cnn" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,49fd828d21d79a5c9430cdb30d7f0126", + "mosdepth-cumcoverage-dist-id.txt:md5,edd8dfc8023e629e476e01c3a1448d30", + "mosdepth_perchrom.txt:md5,49fd828d21d79a5c9430cdb30d7f0126", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "samtools-stats-dp.txt:md5,41d36c1a8413d565788a1a364b467606", + "samtools_alignment_plot.txt:md5,35f8fd2a557568b2237193f46afbab5c", + "sample1.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample1.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample1.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample1.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample1.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "multi_intervals.antitarget.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "multi_intervals.target.bed:md5,f7474a0afbf5565c5675916606f2d9bd", + "reference.cnn:md5,891454915f82d0eeda0be13d71e0d5d7", + "test.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test.paired_end.recalibrated.sorted.bintest.cns:md5,8d0301673b584a6a8d88bdaad87ba09d", + "test.paired_end.recalibrated.sorted.call.cns:md5,9254686f5d1a2718cc2a20f28d0ef3d7", + "test.paired_end.recalibrated.sorted.cnr:md5,da3986c5e7a9c655ce7300062cfc00fa", + "test.paired_end.recalibrated.sorted.cns:md5,e25853dfbbb65f5294a45a2530cded76", + "test.paired_end.recalibrated.sorted.genemetrics.tsv:md5,5ec3555520f502f00f551ae7900a3824", + "test.paired_end.recalibrated.sorted.germline.call.cns:md5,5bba48aecc2282971b9d4c562bec4591", + "test.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,4153756b30abc934f10717c023bd262f" + ], + [ + "sample1.bam:md5,891da60dc1cc5c6455bfc1442aea0982" + ], + "No CRAM files", + [ + "sample1.cnvcall.vcf:md5,6f8c517f62715c47674c79ffd1e9babb" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T22:14:57.81598857" + }, + "-profile test --tools cnvkit --input recalibrated_tumoronly.csv": { + "content": [ + 13, + { + "CNVKIT_ANTITARGET": { + "cnvkit": "0.9.11" + }, + "CNVKIT_BATCH": { + "cnvkit": "0.9.10" + }, + "CNVKIT_CALL": { + "cnvkit": "0.9.10" + }, + "CNVKIT_EXPORT": { + "cnvkit": "0.9.10" + }, + "CNVKIT_GENEMETRICS": { + "cnvkit": "0.9.10" + }, + "CNVKIT_REFERENCE": { + "cnvkit": "0.9.11" + }, + "CRAM_TO_BAM": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/converted", + "preprocessing/converted/cram_to_bam", + "preprocessing/converted/cram_to_bam/sample2", + "preprocessing/converted/cram_to_bam/sample2/sample2.bam", + "preprocessing/converted/cram_to_bam/sample2/sample2.bam.bai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "variant_calling", + "variant_calling/cnvkit", + "variant_calling/cnvkit/sample2", + "variant_calling/cnvkit/sample2/cnvkit.reference.antitarget-tmp.bed", + "variant_calling/cnvkit/sample2/cnvkit.reference.target-tmp.bed", + "variant_calling/cnvkit/sample2/sample2-diagram.pdf", + "variant_calling/cnvkit/sample2/sample2-scatter.png", + "variant_calling/cnvkit/sample2/sample2.antitargetcoverage.cnn", + "variant_calling/cnvkit/sample2/sample2.bintest.cns", + "variant_calling/cnvkit/sample2/sample2.call.cns", + "variant_calling/cnvkit/sample2/sample2.cnr", + "variant_calling/cnvkit/sample2/sample2.cns", + "variant_calling/cnvkit/sample2/sample2.cnvcall.vcf", + "variant_calling/cnvkit/sample2/sample2.genemetrics.tsv", + "variant_calling/cnvkit/sample2/sample2.targetcoverage.cnn", + "variant_calling/cnvkit/sample2/sample2.tumor_only.call.cns" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,ff38f1e35ce12244c751921db673b23c", + "mosdepth-cumcoverage-dist-id.txt:md5,8398d1127a10d0c002831ddedfb9713b", + "mosdepth_perchrom.txt:md5,ff38f1e35ce12244c751921db673b23c", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample2.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample2.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample2.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample2.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "cnvkit.reference.antitarget-tmp.bed:md5,3d4d20f9f23b39970865d29ef239d20b", + "cnvkit.reference.target-tmp.bed:md5,657b25dbda8516624efa8cb2cf3716ca", + "sample2.antitargetcoverage.cnn:md5,067115082c4af4b64d58c0dc3a3642e4", + "sample2.bintest.cns:md5,7a66b5f63acb05e6dfb0784c215851ec", + "sample2.call.cns:md5,f7caeca04aba28b125ce26b511f42afb", + "sample2.cnr:md5,d9bdb71ce807051369577ee7f807a40c", + "sample2.cns:md5,2b56aac606ba6183d018b30ca58afcec", + "sample2.genemetrics.tsv:md5,5ec3555520f502f00f551ae7900a3824", + "sample2.targetcoverage.cnn:md5,e6d0190c1c37ce6e41f76ca5b24ccca3", + "sample2.tumor_only.call.cns:md5,70a308c6db7acf1a5fd623936cac6412" + ], + [ + "sample2.bam:md5,36856b3e6dd1bc0f10688b9d8c02faa8" + ], + "No CRAM files", + [ + "sample2.cnvcall.vcf:md5,35662b51f071469c6227be15114eb824" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T22:17:53.424126221" + }, + "-profile test --tools cnvkit --input recalibrated_tumoronly.csv --no_intervals": { + "content": [ + 5, + { + "CRAM_TO_BAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/converted", + "preprocessing/converted/cram_to_bam", + "preprocessing/converted/cram_to_bam/sample2", + "preprocessing/converted/cram_to_bam/sample2/sample2.bam", + "preprocessing/converted/cram_to_bam/sample2/sample2.bam.bai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,ff38f1e35ce12244c751921db673b23c", + "mosdepth-cumcoverage-dist-id.txt:md5,8398d1127a10d0c002831ddedfb9713b", + "mosdepth_perchrom.txt:md5,ff38f1e35ce12244c751921db673b23c", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample2.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample2.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample2.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample2.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd" + ], + [ + "sample2.bam:md5,36856b3e6dd1bc0f10688b9d8c02faa8" + ], + "No CRAM files", + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T22:21:53.321675509" + }, + "-profile test --tools cnvkit --input recalibrated_somatic.csv --no_intervals": { + "content": [ + 16, + { + "CNVKIT_BATCH": { + "cnvkit": "0.9.10", + "samtools": 1.17 + }, + "CNVKIT_CALL": { + "cnvkit": "0.9.10" + }, + "CNVKIT_EXPORT": { + "cnvkit": "0.9.10" + }, + "CNVKIT_GENEMETRICS": { + "cnvkit": "0.9.10" + }, + "CRAM_TO_BAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/converted", + "preprocessing/converted/cram_to_bam", + "preprocessing/converted/cram_to_bam/sample3", + "preprocessing/converted/cram_to_bam/sample3/sample3.bam", + "preprocessing/converted/cram_to_bam/sample3/sample3.bam.bai", + "preprocessing/converted/cram_to_bam/sample4", + "preprocessing/converted/cram_to_bam/sample4/sample4.bam", + "preprocessing/converted/cram_to_bam/sample4/sample4.bam.bai", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "variant_calling", + "variant_calling/cnvkit", + "variant_calling/cnvkit/sample3", + "variant_calling/cnvkit/sample3/genome.antitarget.bed", + "variant_calling/cnvkit/sample3/genome.bed", + "variant_calling/cnvkit/sample3/genome.target.bed", + "variant_calling/cnvkit/sample3/reference.cnn", + "variant_calling/cnvkit/sample3/sample3.cnvcall.vcf", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted-diagram.pdf", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted-scatter.png", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.bintest.cns", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.call.cns", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.cnr", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.cns", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.genemetrics.tsv", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.germline.call.cns", + "variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.targetcoverage.cnn", + "variant_calling/cnvkit/sample4_vs_sample3", + "variant_calling/cnvkit/sample4_vs_sample3/genome.antitarget.bed", + "variant_calling/cnvkit/sample4_vs_sample3/genome.bed", + "variant_calling/cnvkit/sample4_vs_sample3/genome.target.bed", + "variant_calling/cnvkit/sample4_vs_sample3/reference.cnn", + "variant_calling/cnvkit/sample4_vs_sample3/sample3.antitargetcoverage.cnn", + "variant_calling/cnvkit/sample4_vs_sample3/sample3.targetcoverage.cnn", + "variant_calling/cnvkit/sample4_vs_sample3/sample4-diagram.pdf", + "variant_calling/cnvkit/sample4_vs_sample3/sample4-scatter.png", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.antitargetcoverage.cnn", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.bintest.cns", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.call.cns", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.cnr", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.cns", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.genemetrics.tsv", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.somatic.call.cns", + "variant_calling/cnvkit/sample4_vs_sample3/sample4.targetcoverage.cnn", + "variant_calling/cnvkit/sample4_vs_sample3/sample4_vs_sample3.cnvcall.vcf" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,336d786b273c4d4e714d51c44207ff56", + "mosdepth-cumcoverage-dist-id.txt:md5,0cc87c596a0fc30ebb676c4587e986de", + "mosdepth_perchrom.txt:md5,336d786b273c4d4e714d51c44207ff56", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample3.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample3.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample3.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample3.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample3.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample4.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample4.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample4.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample4.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample4.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "genome.antitarget.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.bed:md5,9c6cc178da8c2c27364be9f25c9df96d", + "genome.target.bed:md5,d6bb0e93de375af227a800a032f08f03", + "reference.cnn:md5,3fa091a8b523751440fcd28dbf8a07ad", + "test.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test.paired_end.recalibrated.sorted.bintest.cns:md5,0eee70a9c66e9fe98deb5ca66b7bced5", + "test.paired_end.recalibrated.sorted.call.cns:md5,2613ce28533ce6b6a81a9225cc3ad498", + "test.paired_end.recalibrated.sorted.cnr:md5,e22929e0f8a30753574952190262878c", + "test.paired_end.recalibrated.sorted.cns:md5,5e89aee43d651ab25f917355c9da259f", + "test.paired_end.recalibrated.sorted.genemetrics.tsv:md5,5ec3555520f502f00f551ae7900a3824", + "test.paired_end.recalibrated.sorted.germline.call.cns:md5,9c4487e30c8f435d62da9aa978d9e792", + "test.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,cb12dff55cceeec41a4c14787d4cf62a", + "genome.antitarget.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.bed:md5,9c6cc178da8c2c27364be9f25c9df96d", + "genome.target.bed:md5,ee8081becc36524d35889e3b5f70961b", + "reference.cnn:md5,8c53491ff76a2a06b7a977714db862bb", + "sample3.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "sample3.targetcoverage.cnn:md5,0067cc3a0e479b23ab3bf056cead31b4", + "sample4.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "sample4.bintest.cns:md5,b6701cceb525c3087f4dad432d20100c", + "sample4.call.cns:md5,2e0cc8813e274b25175b4346f4698fbb", + "sample4.cnr:md5,9dba016feb45f566a92e0aac184472bb", + "sample4.cns:md5,b3dfd6adf2ac97009dc460453d42659b", + "sample4.genemetrics.tsv:md5,5ec3555520f502f00f551ae7900a3824", + "sample4.somatic.call.cns:md5,5e4d472294661d2010e13deefe5ae4f5", + "sample4.targetcoverage.cnn:md5,1e1012812eb893afd931485cb760294e" + ], + [ + "sample3.bam:md5,891da60dc1cc5c6455bfc1442aea0982", + "sample4.bam:md5,36856b3e6dd1bc0f10688b9d8c02faa8" + ], + "No CRAM files", + [ + "sample3.cnvcall.vcf:md5,172e25057fc6cab0a0b325c55bfacc2a", + "sample4_vs_sample3.cnvcall.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T22:20:58.031095995" + } +} diff --git a/tests/variant_calling_controlfreec.nf.test b/tests/variant_calling_controlfreec.nf.test new file mode 100644 index 0000000000..0a1d7182db --- /dev/null +++ b/tests/variant_calling_controlfreec.nf.test @@ -0,0 +1,77 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools controlfreec somatic", + params: [ + chr_dir: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz', + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + step: "variant_calling", + tools: 'controlfreec', + wes: true + ] + ], + [ + name: "-profile test --tools controlfreec --no_intervals tumoronly", + params: [ + chr_dir: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz', + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + no_intervals: true, + step: "variant_calling", + tools: 'controlfreec', + wes: true + ] + ], + [ + name: "-profile test --tools controlfreec --no_intervals somatic -stub", + params: [ + chr_dir: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz', + dbsnp : modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi : modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta : modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai : modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input : "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + no_intervals : true, + step : "variant_calling", + tools : 'controlfreec', + wes : true + ], + stub: true + ], + [ + name: "-profile test --tools controlfreec tumoronly -stub", + params: [ + chr_dir: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz', + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + step: "variant_calling", + tools: 'controlfreec', + wes: true + ], + stub: true + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_controlfreec.nf.test.snap b/tests/variant_calling_controlfreec.nf.test.snap new file mode 100644 index 0000000000..91b2a395cd --- /dev/null +++ b/tests/variant_calling_controlfreec.nf.test.snap @@ -0,0 +1,528 @@ +{ + "-profile test --tools controlfreec --no_intervals tumoronly": { + "content": [ + 11, + { + "ASSESS_SIGNIFICANCE": { + "controlfreec": 11.6 + }, + "FREEC2BED": { + "controlfreec": "11.6b" + }, + "FREEC2CIRCOS": { + "controlfreec": "11.6b" + }, + "FREEC_TUMORONLY": { + "controlfreec": "11.6b" + }, + "MAKEGRAPH2": { + "controlfreec": "11.6b" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_MPILEUP": { + "samtools": 1.21 + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "UNTAR_CHR_DIR": { + "untar": 1.34 + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "variant_calling", + "variant_calling/controlfreec", + "variant_calling/controlfreec/sample2", + "variant_calling/controlfreec/sample2/config.txt", + "variant_calling/controlfreec/sample2/sample2.bed", + "variant_calling/controlfreec/sample2/sample2.circos.txt", + "variant_calling/controlfreec/sample2/sample2.p.value.txt", + "variant_calling/controlfreec/sample2/sample2.tumor.mpileup.gz_BAF.txt", + "variant_calling/controlfreec/sample2/sample2.tumor.mpileup.gz_CNVs", + "variant_calling/controlfreec/sample2/sample2.tumor.mpileup.gz_info.txt", + "variant_calling/controlfreec/sample2/sample2.tumor.mpileup.gz_ratio.BedGraph", + "variant_calling/controlfreec/sample2/sample2.tumor.mpileup.gz_ratio.txt", + "variant_calling/controlfreec/sample2/sample2.tumor.mpileup.gz_sample.cpn", + "variant_calling/controlfreec/sample2/sample2_BAF.png", + "variant_calling/controlfreec/sample2/sample2_ratio.log2.png", + "variant_calling/controlfreec/sample2/sample2_ratio.png" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,a0266cd06dd6f7f5463f09e3cb8af71d", + "mosdepth-cumcoverage-dist-id.txt:md5,88b94dd2dcc423983da65125ece7651e", + "mosdepth_perchrom.txt:md5,a0266cd06dd6f7f5463f09e3cb8af71d", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample2.recal.mosdepth.summary.txt:md5,0a7300e56eda6fba7c7564f00aa000f0", + "sample2.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample2.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample2.bed:md5,5249f46e614b60867bdd6b9b83327979", + "sample2.circos.txt:md5,2efab24d023931cec8b158c56d1f1765", + "sample2.p.value.txt:md5,38c8c9ad33a4fca3804a34d5c436cd1e", + "sample2.tumor.mpileup.gz_BAF.txt:md5,0bb91da6a637ed64d7622eb7d539fd71", + "sample2.tumor.mpileup.gz_CNVs:md5,741831784091e9a51e0c07117b67e18f", + "sample2.tumor.mpileup.gz_info.txt:md5,fed6aa0e0f4232255d5152f5774161b9", + "sample2.tumor.mpileup.gz_ratio.BedGraph:md5,d2347daecbb4eb1f1a3b5558acdf657a", + "sample2.tumor.mpileup.gz_ratio.txt:md5,7587b17b4303715aa45eae017e357c23", + "sample2.tumor.mpileup.gz_sample.cpn:md5,8bf25e5cf94e89bcbbd4bb0d453d3057" + ], + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T22:28:08.497498851" + }, + "-profile test --tools controlfreec somatic": { + "content": [ + 16, + { + "ASSESS_SIGNIFICANCE": { + "controlfreec": 11.6 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FREEC2BED": { + "controlfreec": "11.6b" + }, + "FREEC2CIRCOS": { + "controlfreec": "11.6b" + }, + "FREEC_SOMATIC": { + "controlfreec": "11.6b" + }, + "MAKEGRAPH2": { + "controlfreec": "11.6b" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_MPILEUP": { + "samtools": 1.21 + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "UNTAR_CHR_DIR": { + "untar": 1.34 + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "variant_calling", + "variant_calling/controlfreec", + "variant_calling/controlfreec/sample4_vs_sample3", + "variant_calling/controlfreec/sample4_vs_sample3/config.txt", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.bed", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.circos.txt", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.normal.mpileup.gz_control.cpn", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.p.value.txt", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_BAF.txt", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_CNVs", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_info.txt", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_ratio.BedGraph", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_ratio.txt", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_sample.cpn", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_BAF.png", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.log2.png", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.png" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,4ee05f71086179b42a01cd2fb450346f", + "mosdepth-cumcoverage-dist-id.txt:md5,cb7468f51b8be1230fb3ac5b130be31f", + "mosdepth_perchrom.txt:md5,4ee05f71086179b42a01cd2fb450346f", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample3.recal.mosdepth.global.dist.txt:md5,69e29702ef01fd8f6c7a5468fc35a16a", + "sample3.recal.mosdepth.region.dist.txt:md5,6ec49cd7d510c2eb3d9d90fdb79b783a", + "sample3.recal.mosdepth.summary.txt:md5,103098d0bf76ed82d2b87d5f242b099a", + "sample3.recal.per-base.bed.gz:md5,297f96648928d0ca5184223fb9941e7c", + "sample3.recal.per-base.bed.gz.csi:md5,c67dcd711b096eb42f43784d5eadbc0d", + "sample3.recal.regions.bed.gz:md5,314ce8d7273eff353072108aa77c327c", + "sample3.recal.regions.bed.gz.csi:md5,9cb0ad7039a3b703d16ca7d5b835c0ee", + "sample4.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample4.recal.mosdepth.region.dist.txt:md5,39005ffaac22871ffaaf19656fe69c5b", + "sample4.recal.mosdepth.summary.txt:md5,68d4b98f17361fddf73052ead34fa370", + "sample4.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample4.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample4.recal.regions.bed.gz:md5,b7561bc56a955f7db0f11e67e2ec0386", + "sample4.recal.regions.bed.gz.csi:md5,393c2749068304d8545b501b9d4658e4", + "sample4_vs_sample3.bed:md5,47f60179409e9389e59b2e2525e42210", + "sample4_vs_sample3.circos.txt:md5,68addb1d8bda08355842bef0ab15cd6e", + "sample4_vs_sample3.normal.mpileup.gz_control.cpn:md5,d50bf2c9a4d35f022364901c284e80ed", + "sample4_vs_sample3.p.value.txt:md5,3fad51341e7ee56c3b02de6a51d77efa", + "sample4_vs_sample3.tumor.mpileup.gz_BAF.txt:md5,723779bd103b66dcfa6fcfa692135a61", + "sample4_vs_sample3.tumor.mpileup.gz_CNVs:md5,1d9166f66bf72adf2aea74adfc4ab015", + "sample4_vs_sample3.tumor.mpileup.gz_info.txt:md5,b79da6ae026d86777d60d9f9edb9c6f6", + "sample4_vs_sample3.tumor.mpileup.gz_ratio.BedGraph:md5,cb087117ea046a6350885a34cb4bf667", + "sample4_vs_sample3.tumor.mpileup.gz_ratio.txt:md5,690cbefd87a77a6a37689135585c401c", + "sample4_vs_sample3.tumor.mpileup.gz_sample.cpn:md5,b4f97163fdb6a3d97ca4ea560394cdb1" + ], + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T22:26:38.500794704" + }, + "-profile test --tools controlfreec --no_intervals somatic -stub": { + "content": [ + 14, + { + "ASSESS_SIGNIFICANCE": { + "controlfreec": 11.6 + }, + "FREEC2BED": { + "controlfreec": "11.6b" + }, + "FREEC2CIRCOS": { + "controlfreec": "11.6b" + }, + "FREEC_SOMATIC": { + "controlfreec": "11.6b" + }, + "MAKEGRAPH2": { + "controlfreec": "11.6b" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_MPILEUP": { + "samtools": 1.21 + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "UNTAR_CHR_DIR": { + "untar": 1.34 + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/.stub", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.per-base.d4", + "reports/mosdepth/sample3/sample3.recal.quantized.bed.gz", + "reports/mosdepth/sample3/sample3.recal.quantized.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.summary.txt", + "reports/mosdepth/sample3/sample3.recal.thresholds.bed.gz", + "reports/mosdepth/sample3/sample3.recal.thresholds.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample4/sample4.recal.per-base.d4", + "reports/mosdepth/sample4/sample4.recal.quantized.bed.gz", + "reports/mosdepth/sample4/sample4.recal.quantized.bed.gz.csi", + "reports/mosdepth/sample4/sample4.recal.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4/sample4.recal.summary.txt", + "reports/mosdepth/sample4/sample4.recal.thresholds.bed.gz", + "reports/mosdepth/sample4/sample4.recal.thresholds.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "variant_calling", + "variant_calling/controlfreec", + "variant_calling/controlfreec/sample4_vs_sample3", + "variant_calling/controlfreec/sample4_vs_sample3/GC_profile.sample4_vs_sample3.cpn", + "variant_calling/controlfreec/sample4_vs_sample3/config.txt", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.bed", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.circos.txt", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.p.value.txt", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_BAF.png", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_BAF.txt", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_CNVs", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_info.txt", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.BedGraph", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.log2.png", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.png", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.txt", + "variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_sample.cpn" + ], + [ + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-18T13:14:28.699871503" + }, + "-profile test --tools controlfreec tumoronly -stub": { + "content": [ + 13, + { + "ASSESS_SIGNIFICANCE": { + "controlfreec": 11.6 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FREEC2BED": { + "controlfreec": "11.6b" + }, + "FREEC2CIRCOS": { + "controlfreec": "11.6b" + }, + "FREEC_TUMORONLY": { + "controlfreec": "11.6b" + }, + "MAKEGRAPH2": { + "controlfreec": "11.6b" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_MPILEUP": { + "samtools": 1.21 + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "UNTAR_CHR_DIR": { + "untar": 1.34 + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/.stub", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample2/sample2.recal.per-base.d4", + "reports/mosdepth/sample2/sample2.recal.quantized.bed.gz", + "reports/mosdepth/sample2/sample2.recal.quantized.bed.gz.csi", + "reports/mosdepth/sample2/sample2.recal.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/mosdepth/sample2/sample2.recal.summary.txt", + "reports/mosdepth/sample2/sample2.recal.thresholds.bed.gz", + "reports/mosdepth/sample2/sample2.recal.thresholds.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "variant_calling", + "variant_calling/controlfreec", + "variant_calling/controlfreec/sample2", + "variant_calling/controlfreec/sample2/GC_profile.sample2.cpn", + "variant_calling/controlfreec/sample2/config.txt", + "variant_calling/controlfreec/sample2/sample2.bed", + "variant_calling/controlfreec/sample2/sample2.circos.txt", + "variant_calling/controlfreec/sample2/sample2.p.value.txt", + "variant_calling/controlfreec/sample2/sample2_BAF.png", + "variant_calling/controlfreec/sample2/sample2_BAF.txt", + "variant_calling/controlfreec/sample2/sample2_CNVs", + "variant_calling/controlfreec/sample2/sample2_info.txt", + "variant_calling/controlfreec/sample2/sample2_ratio.BedGraph", + "variant_calling/controlfreec/sample2/sample2_ratio.log2.png", + "variant_calling/controlfreec/sample2/sample2_ratio.png", + "variant_calling/controlfreec/sample2/sample2_ratio.txt", + "variant_calling/controlfreec/sample2/sample2_sample.cpn" + ], + [ + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-18T13:15:24.776142198" + } +} diff --git a/tests/variant_calling_deepvariant.nf.test b/tests/variant_calling_deepvariant.nf.test new file mode 100644 index 0000000000..5a0e8cf6ce --- /dev/null +++ b/tests/variant_calling_deepvariant.nf.test @@ -0,0 +1,54 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools deepvariant --input tests/csv/3.0/mapped_single_cram.csv", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_cram.csv", + step: "variant_calling", + tools: 'deepvariant' + ], + no_conda: true + ], + [ + name: "-profile test --tools deepvariant --input tests/csv/3.0/mapped_single_cram.csv -stub", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_cram.csv", + step: "variant_calling", + tools: 'deepvariant' + ], + no_conda: true, + stub: true + ], + [ + name: "-profile test --tools deepvariant --input tests/csv/3.0/mapped_single_cram.csv --no_intervals", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_cram.csv", + step: "variant_calling", + tools: 'deepvariant', + no_intervals: true + ], + no_conda: true, + ], + [ + name: "-profile test --tools deepvariant --input tests/csv/3.0/mapped_single_cram.csv --no_intervals -stub", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_cram.csv", + step: "variant_calling", + tools: 'deepvariant', + no_intervals: true + ], + stub: true, + no_conda: true + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_deepvariant.nf.test.snap b/tests/variant_calling_deepvariant.nf.test.snap new file mode 100644 index 0000000000..b337dac57b --- /dev/null +++ b/tests/variant_calling_deepvariant.nf.test.snap @@ -0,0 +1,646 @@ +{ + "-profile test --tools deepvariant --input tests/csv/3.0/mapped_single_cram.csv": { + "content": [ + 12, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "DEEPVARIANT_RUNDEEPVARIANT": { + "deepvariant": "1.9.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/deepvariant", + "reports/bcftools/deepvariant/test", + "reports/bcftools/deepvariant/test/test.deepvariant.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/deepvariant", + "reports/vcftools/deepvariant/test", + "reports/vcftools/deepvariant/test/test.deepvariant.FILTER.summary", + "reports/vcftools/deepvariant/test/test.deepvariant.TsTv.count", + "reports/vcftools/deepvariant/test/test.deepvariant.TsTv.qual", + "variant_calling", + "variant_calling/deepvariant", + "variant_calling/deepvariant/test", + "variant_calling/deepvariant/test/test.deepvariant.g.vcf.gz", + "variant_calling/deepvariant/test/test.deepvariant.g.vcf.gz.tbi", + "variant_calling/deepvariant/test/test.deepvariant.vcf.gz", + "variant_calling/deepvariant/test/test.deepvariant.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,115b9a93bcf511419e6622919172f321", + "mosdepth-cumcoverage-dist-id.txt:md5,df2fc825dfc41e30f72f5d125a6447ee", + "mosdepth_perchrom.txt:md5,115b9a93bcf511419e6622919172f321", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,06abce459a3e3192252b6196ae3b189e", + "samtools_alignment_plot.txt:md5,3622ecb71829cc1c581157e403048fd1", + "test.deepvariant.bcftools_stats.txt:md5,a19ff85c0bc9796ace876f3a9c34c6ce", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "test.deepvariant.FILTER.summary:md5,d0fe196a8062d7d67d9018cb40f5490c", + "test.deepvariant.TsTv.count:md5,2851ce55f9813280515c2fdcacb86844" + ], + "No BAM files", + "No CRAM files", + [ + "test.deepvariant.g.vcf.gz:md5,706cb23dc04a7e1055f3e5739666615", + "test.deepvariant.vcf.gz:md5,dc50f0a5b27f3ade02c745e69e69ad29" + ], + "No warnings" + ], + "timestamp": "2025-10-02T10:43:58.630637808", + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + } + }, + "-profile test --tools deepvariant --input tests/csv/3.0/mapped_single_cram.csv -stub": { + "content": [ + 12, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "DEEPVARIANT_RUNDEEPVARIANT": { + "deepvariant": "1.9.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/.stub", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/deepvariant", + "reports/bcftools/deepvariant/test", + "reports/bcftools/deepvariant/test/test.deepvariant.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.global.dist.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/mosdepth/test/test.recal.per-base.d4", + "reports/mosdepth/test/test.recal.quantized.bed.gz", + "reports/mosdepth/test/test.recal.quantized.bed.gz.csi", + "reports/mosdepth/test/test.recal.region.dist.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.summary.txt", + "reports/mosdepth/test/test.recal.thresholds.bed.gz", + "reports/mosdepth/test/test.recal.thresholds.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/deepvariant", + "reports/vcftools/deepvariant/test", + "reports/vcftools/deepvariant/test/test.deepvariant.012", + "reports/vcftools/deepvariant/test/test.deepvariant.012.indv", + "reports/vcftools/deepvariant/test/test.deepvariant.012.pos", + "reports/vcftools/deepvariant/test/test.deepvariant.BEAGLE.GL", + "reports/vcftools/deepvariant/test/test.deepvariant.BEAGLE.PL", + "reports/vcftools/deepvariant/test/test.deepvariant.FILTER.summary", + "reports/vcftools/deepvariant/test/test.deepvariant.FORMAT", + "reports/vcftools/deepvariant/test/test.deepvariant.INFO", + "reports/vcftools/deepvariant/test/test.deepvariant.LROH", + "reports/vcftools/deepvariant/test/test.deepvariant.Tajima.D", + "reports/vcftools/deepvariant/test/test.deepvariant.TsTv", + "reports/vcftools/deepvariant/test/test.deepvariant.TsTv.count", + "reports/vcftools/deepvariant/test/test.deepvariant.TsTv.qual", + "reports/vcftools/deepvariant/test/test.deepvariant.TsTv.summary", + "reports/vcftools/deepvariant/test/test.deepvariant.bcf", + "reports/vcftools/deepvariant/test/test.deepvariant.diff.discordance.matrix", + "reports/vcftools/deepvariant/test/test.deepvariant.diff.indv", + "reports/vcftools/deepvariant/test/test.deepvariant.diff.indv_in_files", + "reports/vcftools/deepvariant/test/test.deepvariant.diff.sites", + "reports/vcftools/deepvariant/test/test.deepvariant.diff.sites_in_files", + "reports/vcftools/deepvariant/test/test.deepvariant.diff.switch", + "reports/vcftools/deepvariant/test/test.deepvariant.frq", + "reports/vcftools/deepvariant/test/test.deepvariant.frq.count", + "reports/vcftools/deepvariant/test/test.deepvariant.gdepth", + "reports/vcftools/deepvariant/test/test.deepvariant.geno.chisq", + "reports/vcftools/deepvariant/test/test.deepvariant.geno.ld", + "reports/vcftools/deepvariant/test/test.deepvariant.hap.ld", + "reports/vcftools/deepvariant/test/test.deepvariant.hapcount", + "reports/vcftools/deepvariant/test/test.deepvariant.het", + "reports/vcftools/deepvariant/test/test.deepvariant.hwe", + "reports/vcftools/deepvariant/test/test.deepvariant.idepth", + "reports/vcftools/deepvariant/test/test.deepvariant.ifreqburden", + "reports/vcftools/deepvariant/test/test.deepvariant.imiss", + "reports/vcftools/deepvariant/test/test.deepvariant.impute.hap", + "reports/vcftools/deepvariant/test/test.deepvariant.impute.hap.indv", + "reports/vcftools/deepvariant/test/test.deepvariant.impute.hap.legend", + "reports/vcftools/deepvariant/test/test.deepvariant.indel.hist", + "reports/vcftools/deepvariant/test/test.deepvariant.interchrom.geno.ld", + "reports/vcftools/deepvariant/test/test.deepvariant.interchrom.hap.ld", + "reports/vcftools/deepvariant/test/test.deepvariant.kept.sites", + "reports/vcftools/deepvariant/test/test.deepvariant.ldepth", + "reports/vcftools/deepvariant/test/test.deepvariant.ldepth.mean", + "reports/vcftools/deepvariant/test/test.deepvariant.ldhat.locs", + "reports/vcftools/deepvariant/test/test.deepvariant.ldhat.sites", + "reports/vcftools/deepvariant/test/test.deepvariant.list.geno.ld", + "reports/vcftools/deepvariant/test/test.deepvariant.list.hap.ld", + "reports/vcftools/deepvariant/test/test.deepvariant.lmiss", + "reports/vcftools/deepvariant/test/test.deepvariant.lqual", + "reports/vcftools/deepvariant/test/test.deepvariant.map", + "reports/vcftools/deepvariant/test/test.deepvariant.mendel", + "reports/vcftools/deepvariant/test/test.deepvariant.ped", + "reports/vcftools/deepvariant/test/test.deepvariant.relatedness", + "reports/vcftools/deepvariant/test/test.deepvariant.relatedness2", + "reports/vcftools/deepvariant/test/test.deepvariant.removed.sites", + "reports/vcftools/deepvariant/test/test.deepvariant.singletons", + "reports/vcftools/deepvariant/test/test.deepvariant.sites.pi", + "reports/vcftools/deepvariant/test/test.deepvariant.snpden", + "reports/vcftools/deepvariant/test/test.deepvariant.tfam", + "reports/vcftools/deepvariant/test/test.deepvariant.tped", + "reports/vcftools/deepvariant/test/test.deepvariant.vcf", + "reports/vcftools/deepvariant/test/test.deepvariant.weir.fst", + "reports/vcftools/deepvariant/test/test.deepvariant.windowed.pi", + "variant_calling", + "variant_calling/deepvariant", + "variant_calling/deepvariant/test", + "variant_calling/deepvariant/test/test.deepvariant.g.vcf.gz", + "variant_calling/deepvariant/test/test.deepvariant.g.vcf.gz.tbi", + "variant_calling/deepvariant/test/test.deepvariant.vcf.gz", + "variant_calling/deepvariant/test/test.deepvariant.vcf.gz.tbi" + ], + [ + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "timestamp": "2026-04-08T12:19:24.034798786", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.2" + } + }, + "-profile test --tools deepvariant --input tests/csv/3.0/mapped_single_cram.csv --no_intervals -stub": { + "content": [ + 9, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "DEEPVARIANT_RUNDEEPVARIANT": { + "deepvariant": "1.9.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/.stub", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/deepvariant", + "reports/bcftools/deepvariant/test", + "reports/bcftools/deepvariant/test/test.deepvariant.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.global.dist.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/mosdepth/test/test.recal.per-base.d4", + "reports/mosdepth/test/test.recal.quantized.bed.gz", + "reports/mosdepth/test/test.recal.quantized.bed.gz.csi", + "reports/mosdepth/test/test.recal.region.dist.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.summary.txt", + "reports/mosdepth/test/test.recal.thresholds.bed.gz", + "reports/mosdepth/test/test.recal.thresholds.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/deepvariant", + "reports/vcftools/deepvariant/test", + "reports/vcftools/deepvariant/test/test.deepvariant.012", + "reports/vcftools/deepvariant/test/test.deepvariant.012.indv", + "reports/vcftools/deepvariant/test/test.deepvariant.012.pos", + "reports/vcftools/deepvariant/test/test.deepvariant.BEAGLE.GL", + "reports/vcftools/deepvariant/test/test.deepvariant.BEAGLE.PL", + "reports/vcftools/deepvariant/test/test.deepvariant.FILTER.summary", + "reports/vcftools/deepvariant/test/test.deepvariant.FORMAT", + "reports/vcftools/deepvariant/test/test.deepvariant.INFO", + "reports/vcftools/deepvariant/test/test.deepvariant.LROH", + "reports/vcftools/deepvariant/test/test.deepvariant.Tajima.D", + "reports/vcftools/deepvariant/test/test.deepvariant.TsTv", + "reports/vcftools/deepvariant/test/test.deepvariant.TsTv.count", + "reports/vcftools/deepvariant/test/test.deepvariant.TsTv.qual", + "reports/vcftools/deepvariant/test/test.deepvariant.TsTv.summary", + "reports/vcftools/deepvariant/test/test.deepvariant.bcf", + "reports/vcftools/deepvariant/test/test.deepvariant.diff.discordance.matrix", + "reports/vcftools/deepvariant/test/test.deepvariant.diff.indv", + "reports/vcftools/deepvariant/test/test.deepvariant.diff.indv_in_files", + "reports/vcftools/deepvariant/test/test.deepvariant.diff.sites", + "reports/vcftools/deepvariant/test/test.deepvariant.diff.sites_in_files", + "reports/vcftools/deepvariant/test/test.deepvariant.diff.switch", + "reports/vcftools/deepvariant/test/test.deepvariant.frq", + "reports/vcftools/deepvariant/test/test.deepvariant.frq.count", + "reports/vcftools/deepvariant/test/test.deepvariant.gdepth", + "reports/vcftools/deepvariant/test/test.deepvariant.geno.chisq", + "reports/vcftools/deepvariant/test/test.deepvariant.geno.ld", + "reports/vcftools/deepvariant/test/test.deepvariant.hap.ld", + "reports/vcftools/deepvariant/test/test.deepvariant.hapcount", + "reports/vcftools/deepvariant/test/test.deepvariant.het", + "reports/vcftools/deepvariant/test/test.deepvariant.hwe", + "reports/vcftools/deepvariant/test/test.deepvariant.idepth", + "reports/vcftools/deepvariant/test/test.deepvariant.ifreqburden", + "reports/vcftools/deepvariant/test/test.deepvariant.imiss", + "reports/vcftools/deepvariant/test/test.deepvariant.impute.hap", + "reports/vcftools/deepvariant/test/test.deepvariant.impute.hap.indv", + "reports/vcftools/deepvariant/test/test.deepvariant.impute.hap.legend", + "reports/vcftools/deepvariant/test/test.deepvariant.indel.hist", + "reports/vcftools/deepvariant/test/test.deepvariant.interchrom.geno.ld", + "reports/vcftools/deepvariant/test/test.deepvariant.interchrom.hap.ld", + "reports/vcftools/deepvariant/test/test.deepvariant.kept.sites", + "reports/vcftools/deepvariant/test/test.deepvariant.ldepth", + "reports/vcftools/deepvariant/test/test.deepvariant.ldepth.mean", + "reports/vcftools/deepvariant/test/test.deepvariant.ldhat.locs", + "reports/vcftools/deepvariant/test/test.deepvariant.ldhat.sites", + "reports/vcftools/deepvariant/test/test.deepvariant.list.geno.ld", + "reports/vcftools/deepvariant/test/test.deepvariant.list.hap.ld", + "reports/vcftools/deepvariant/test/test.deepvariant.lmiss", + "reports/vcftools/deepvariant/test/test.deepvariant.lqual", + "reports/vcftools/deepvariant/test/test.deepvariant.map", + "reports/vcftools/deepvariant/test/test.deepvariant.mendel", + "reports/vcftools/deepvariant/test/test.deepvariant.ped", + "reports/vcftools/deepvariant/test/test.deepvariant.relatedness", + "reports/vcftools/deepvariant/test/test.deepvariant.relatedness2", + "reports/vcftools/deepvariant/test/test.deepvariant.removed.sites", + "reports/vcftools/deepvariant/test/test.deepvariant.singletons", + "reports/vcftools/deepvariant/test/test.deepvariant.sites.pi", + "reports/vcftools/deepvariant/test/test.deepvariant.snpden", + "reports/vcftools/deepvariant/test/test.deepvariant.tfam", + "reports/vcftools/deepvariant/test/test.deepvariant.tped", + "reports/vcftools/deepvariant/test/test.deepvariant.vcf", + "reports/vcftools/deepvariant/test/test.deepvariant.weir.fst", + "reports/vcftools/deepvariant/test/test.deepvariant.windowed.pi", + "variant_calling", + "variant_calling/deepvariant", + "variant_calling/deepvariant/test", + "variant_calling/deepvariant/test/test.deepvariant.g.vcf.gz", + "variant_calling/deepvariant/test/test.deepvariant.g.vcf.gz.tbi", + "variant_calling/deepvariant/test/test.deepvariant.vcf.gz", + "variant_calling/deepvariant/test/test.deepvariant.vcf.gz.tbi" + ], + [ + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "timestamp": "2026-04-08T12:20:26.174510128", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.2" + } + }, + "-profile test --tools deepvariant --input tests/csv/3.0/mapped_single_cram.csv --no_intervals": { + "content": [ + 9, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "DEEPVARIANT_RUNDEEPVARIANT": { + "deepvariant": "1.9.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/deepvariant", + "reports/bcftools/deepvariant/test", + "reports/bcftools/deepvariant/test/test.deepvariant.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/deepvariant", + "reports/vcftools/deepvariant/test", + "reports/vcftools/deepvariant/test/test.deepvariant.FILTER.summary", + "reports/vcftools/deepvariant/test/test.deepvariant.TsTv.count", + "reports/vcftools/deepvariant/test/test.deepvariant.TsTv.qual", + "variant_calling", + "variant_calling/deepvariant", + "variant_calling/deepvariant/test", + "variant_calling/deepvariant/test/test.deepvariant.g.vcf.gz", + "variant_calling/deepvariant/test/test.deepvariant.g.vcf.gz.tbi", + "variant_calling/deepvariant/test/test.deepvariant.vcf.gz", + "variant_calling/deepvariant/test/test.deepvariant.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,115b9a93bcf511419e6622919172f321", + "mosdepth-cumcoverage-dist-id.txt:md5,df2fc825dfc41e30f72f5d125a6447ee", + "mosdepth_perchrom.txt:md5,115b9a93bcf511419e6622919172f321", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,06abce459a3e3192252b6196ae3b189e", + "samtools_alignment_plot.txt:md5,3622ecb71829cc1c581157e403048fd1", + "test.deepvariant.bcftools_stats.txt:md5,a19ff85c0bc9796ace876f3a9c34c6ce", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "test.deepvariant.FILTER.summary:md5,d0fe196a8062d7d67d9018cb40f5490c", + "test.deepvariant.TsTv.count:md5,2851ce55f9813280515c2fdcacb86844" + ], + "No BAM files", + "No CRAM files", + [ + "test.deepvariant.g.vcf.gz:md5,706cb23dc04a7e1055f3e5739666615", + "test.deepvariant.vcf.gz:md5,dc50f0a5b27f3ade02c745e69e69ad29" + ], + "No warnings" + ], + "timestamp": "2025-10-02T10:45:45.83448312", + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + } + } +} \ No newline at end of file diff --git a/tests/variant_calling_freebayes.nf.test b/tests/variant_calling_freebayes.nf.test new file mode 100644 index 0000000000..d1f4fb07b7 --- /dev/null +++ b/tests/variant_calling_freebayes.nf.test @@ -0,0 +1,94 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools freebayes --wes --nucleotides_per_second 20", + params: [ + intervals : modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', + tools : 'freebayes', + wes : true, + nucleotides_per_second : 20 + ], + include_freebayes_unfiltered: true, + ignoreFiles: '**/*.freebayes.vcf{,.gz}' + ], + [ + name: "-profile test --tools freebayes --no_intervals", + params: [ + tools : 'freebayes', + no_intervals : true + ], + include_freebayes_unfiltered: true, + ignoreFiles: '**/*.freebayes.vcf{,.gz}' + ], + [ + name: "-profile test --tools freebayes --wes --nucleotides_per_second 20 --input fastq_pair.csv", + params: [ + input : "${projectDir}/tests/csv/3.0/fastq_pair.csv", + intervals : modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', + tools : 'freebayes', + wes : true, + nucleotides_per_second : 20 + ], + include_freebayes_unfiltered: true, + ignoreFiles: '**/*.freebayes.vcf{,.gz}' + ], + [ + name: "-profile test --tools freebayes --no_intervals --wes --input fastq_pair.csv", + params: [ + input : "${projectDir}/tests/csv/3.0/fastq_pair.csv", + tools : 'freebayes', + no_intervals : true, + wes : true + ], + include_freebayes_unfiltered: true, + ignoreFiles: '**/*.freebayes.vcf{,.gz}', + // instability in QUAL and genotype scores + no_vcf_md5sum: true, + ], + [ + name: "-profile test,tools_tumoronly --tools freebayes --input recalibrated_tumoronly.csv", + params: [ + fasta : modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai : modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals : modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input : "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + step : "variant_calling", + tools : 'freebayes', + wes : true, + nucleotides_per_second : 20, + genome : null, + igenomes_ignore : true + ], + include_freebayes_unfiltered: true, + ignoreFiles: '**/*.freebayes.vcf{,.gz}' + ], + [ + name: "-profile test --tools freebayes --no_intervals --input recalibrated_tumoronly.csv", + params: [ + fasta : modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai : modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input : "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + step : "variant_calling", + tools : 'freebayes', + wes : true, + nucleotides_per_second : 20, + genome : null, + igenomes_ignore : true, + no_intervals : true + ], + include_freebayes_unfiltered: true, + ignoreFiles: '**/*.freebayes.vcf{,.gz}' + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_freebayes.nf.test.snap b/tests/variant_calling_freebayes.nf.test.snap new file mode 100644 index 0000000000..5b4e7c44d5 --- /dev/null +++ b/tests/variant_calling_freebayes.nf.test.snap @@ -0,0 +1,1936 @@ +{ + "-profile test,tools_tumoronly --tools freebayes --input recalibrated_tumoronly.csv": { + "content": [ + 19, + { + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "MERGE_FREEBAYES": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/sample2", + "reports/bcftools/freebayes/sample2/sample2.freebayes.filtered.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/sample2", + "reports/vcftools/freebayes/sample2/sample2.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/sample2/sample2.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/sample2/sample2.freebayes.filtered.TsTv.qual", + "variant_calling", + "variant_calling/freebayes", + "variant_calling/freebayes/sample2", + "variant_calling/freebayes/sample2/sample2.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/sample2/sample2.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz", + "variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,1f24f2f40467234c410a8bda544a8aae", + "mosdepth-cumcoverage-dist-id.txt:md5,1036ea76acae803f591fd99838a8eded", + "mosdepth_perchrom.txt:md5,1f24f2f40467234c410a8bda544a8aae", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.freebayes.filtered.bcftools_stats.txt:md5,58e57fe87d5eb8b7ccd9b1ecd2d196df", + "sample2.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample2.recal.mosdepth.region.dist.txt:md5,39005ffaac22871ffaaf19656fe69c5b", + "sample2.recal.mosdepth.summary.txt:md5,68d4b98f17361fddf73052ead34fa370", + "sample2.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample2.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample2.recal.regions.bed.gz:md5,b7561bc56a955f7db0f11e67e2ec0386", + "sample2.recal.regions.bed.gz.csi:md5,393c2749068304d8545b501b9d4658e4", + "sample2.freebayes.filtered.FILTER.summary:md5,0a8ab31642123f2fc365df90166f22d5", + "sample2.freebayes.filtered.TsTv.count:md5,1756160098a27e7537c0fa8ea5cffc5f" + ], + "No BAM files", + "No CRAM files", + [ + [ + "sample2.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=15342, phased=false, phasedAutodetect=false]" + ] + ], + [ + "sample2.freebayes.filtered.vcf.gz:md5,7ceeddfa65d53e50e6faeb39fd4f8133" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-04T12:14:50.842752" + }, + "-profile test --tools freebayes --no_intervals --wes --input fastq_pair.csv": { + "content": [ + 41, + { + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_VC_FREEBAYES": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/markduplicates/test2", + "preprocessing/markduplicates/test2/test2.md.cram", + "preprocessing/markduplicates/test2/test2.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recal_table/test2", + "preprocessing/recal_table/test2/test2.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "preprocessing/recalibrated/test2", + "preprocessing/recalibrated/test2/test2.recal.cram", + "preprocessing/recalibrated/test2/test2.recal.cram.crai", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/test", + "reports/bcftools/freebayes/test/test.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/freebayes/test2_vs_test", + "reports/bcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.bcftools_stats.txt", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test2-test_L1", + "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.html", + "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.zip", + "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.html", + "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/markduplicates/test2", + "reports/markduplicates/test2/test2.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.per-base.bed.gz", + "reports/mosdepth/test/test.md.per-base.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/mosdepth/test2", + "reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt", + "reports/mosdepth/test2/test2.md.mosdepth.summary.txt", + "reports/mosdepth/test2/test2.md.per-base.bed.gz", + "reports/mosdepth/test2/test2.md.per-base.bed.gz.csi", + "reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test2/test2.recal.mosdepth.summary.txt", + "reports/mosdepth/test2/test2.recal.per-base.bed.gz", + "reports/mosdepth/test2/test2.recal.per-base.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/samtools/test2", + "reports/samtools/test2/test2.md.cram.stats", + "reports/samtools/test2/test2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/test", + "reports/vcftools/freebayes/test/test.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.qual", + "reports/vcftools/freebayes/test2_vs_test", + "reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.TsTv.qual", + "variant_calling", + "variant_calling/freebayes", + "variant_calling/freebayes/test", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/test/test.freebayes.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi", + "variant_calling/freebayes/test2_vs_test", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz.tbi" + ], + [ + "fastqc-status-check-heatmap.txt:md5,eeb4e7e7a45f4223c86bfe3aea81f90b", + "fastqc_adapter_content_plot.txt:md5,cc7a809f9f001c10646ee4199ccdb40f", + "fastqc_per_base_n_content_plot.txt:md5,1eba855ae0fa5b5ed4a1f90d1c97f759", + "fastqc_per_base_sequence_quality_plot.txt:md5,cbb2743dfb2ec74e72b578c83ec28ee8", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,73c884822eba0bafcdf34b90fe81aec5", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,24eeb00e5e2b11c7ab90a3223d429d15", + "fastqc_per_sequence_quality_scores_plot.txt:md5,6f048594f02effb93608665be29bd35a", + "fastqc_sequence_counts_plot.txt:md5,fca7ee9ef3382e2837a302d8c5d33769", + "fastqc_sequence_duplication_levels_plot.txt:md5,2aa0c6f33e4cffbb29cdabe2c28bb097", + "fastqc_sequence_length_distribution_plot.txt:md5,61b1fe978a2c73b86c30c27ee4bc60ae", + "mosdepth-coverage-per-contig-single.txt:md5,5e1e538631fd3e6e45158f9edf33ee8d", + "mosdepth-cumcoverage-dist-id.txt:md5,34725df04f598cc54a81ec32f8c7ae41", + "mosdepth_perchrom.txt:md5,5e1e538631fd3e6e45158f9edf33ee8d", + "multiqc_citations.txt:md5,ace4ca89138a5f1e2be289c157c00bd9", + "multiqc_fastqc.txt:md5,2fd25e8c81f962594b801d5a9df3cd87", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,7a0481b59cdd57cc0b8bc9b5641614c6", + "samtools_alignment_plot.txt:md5,8e6178a26fe2a4fc4f45fac3175ba6c6", + "test.freebayes.filtered.bcftools_stats.txt:md5,dde124ceaf6f109cd274b837b950096b", + "test2_vs_test.freebayes.filtered.bcftools_stats.txt:md5,6f942caec2f4f02c69e1216226fa44a9", + "test.md.mosdepth.global.dist.txt:md5,5a0679057c530e5945c9c5a3a17312dc", + "test.md.mosdepth.summary.txt:md5,0010c2396a3173c7cf4983abe2eb6a4c", + "test.md.per-base.bed.gz:md5,34dfe443c0a0767562dd65272e3310ef", + "test.md.per-base.bed.gz.csi:md5,b0ab630c3241fbd7581b7a38d944ff8b", + "test.recal.mosdepth.global.dist.txt:md5,5a0679057c530e5945c9c5a3a17312dc", + "test.recal.mosdepth.summary.txt:md5,0010c2396a3173c7cf4983abe2eb6a4c", + "test.recal.per-base.bed.gz:md5,34dfe443c0a0767562dd65272e3310ef", + "test.recal.per-base.bed.gz.csi:md5,b0ab630c3241fbd7581b7a38d944ff8b", + "test2.md.mosdepth.global.dist.txt:md5,f25166c3a0051bb4d8c11a210278de6c", + "test2.md.mosdepth.summary.txt:md5,d5e4084de2ea2a0a7b60b2d71c804d4b", + "test2.md.per-base.bed.gz:md5,e1c6d60621d8f64aaf28fa1c1ddda921", + "test2.md.per-base.bed.gz.csi:md5,4205a09ede17cdbdaad45e3553f73105", + "test2.recal.mosdepth.global.dist.txt:md5,f25166c3a0051bb4d8c11a210278de6c", + "test2.recal.mosdepth.summary.txt:md5,d5e4084de2ea2a0a7b60b2d71c804d4b", + "test2.recal.per-base.bed.gz:md5,e1c6d60621d8f64aaf28fa1c1ddda921", + "test2.recal.per-base.bed.gz.csi:md5,4205a09ede17cdbdaad45e3553f73105", + "test.freebayes.filtered.FILTER.summary:md5,87e753ba2ad969475fb55661852f75e0", + "test.freebayes.filtered.TsTv.count:md5,845f64e5bb4224af98f3a47294cd5483", + "test2_vs_test.freebayes.filtered.FILTER.summary:md5,126e83dcd37b82420f7c5d7b235479f1", + "test2_vs_test.freebayes.filtered.TsTv.count:md5,28919c7d29c998681391d2027af3e0f9" + ], + "No BAM files", + [ + "test.md.cram:md5,59ecc5c82c7af1283eea7507c590c831", + "test2.md.cram:md5,bac87cf9290577fd9a4def63e046031f", + "test.recal.cram:md5,59ecc5c82c7af1283eea7507c590c831", + "test2.recal.cram:md5,bac87cf9290577fd9a4def63e046031f" + ], + [ + [ + "test.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=732, phased=false, phasedAutodetect=false]" + ], + [ + "test2_vs_test.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=1447, phased=false, phasedAutodetect=false]" + ] + ], + [ + [ + "test.freebayes.filtered.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=107, phased=false, phasedAutodetect=false]" + ], + [ + "test2_vs_test.freebayes.filtered.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=137, phased=false, phasedAutodetect=false]" + ] + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998513#1/1", + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T22:43:02.382544129" + }, + "-profile test --tools freebayes --wes --nucleotides_per_second 20": { + "content": [ + 33, + { + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_GATHERBQSRREPORTS": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MERGE_CRAM": { + "samtools": 1.21 + }, + "MERGE_FREEBAYES": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/test", + "reports/bcftools/freebayes/test/test.freebayes.filtered.bcftools_stats.txt", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test-test_L2", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.zip", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.per-base.bed.gz", + "reports/mosdepth/test/test.md.per-base.bed.gz.csi", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/test", + "reports/vcftools/freebayes/test/test.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.qual", + "variant_calling", + "variant_calling/freebayes", + "variant_calling/freebayes/test", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/test/test.freebayes.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi" + ], + [ + "fastqc-status-check-heatmap.txt:md5,a020b9689ddeb4abec16b4854fe452f1", + "fastqc_adapter_content_plot.txt:md5,2e1b72be741319e7fadbbb39d7e5b37d", + "fastqc_per_base_n_content_plot.txt:md5,ad3b971a6bb4e8ba6c844c8a03584eb8", + "fastqc_per_base_sequence_quality_plot.txt:md5,1bc03889d243a944253ac637d81ae10c", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,2c42d140ce06c08dad2b58f397c23239", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,59e22821d350bfb97c37ffd9088f5ad9", + "fastqc_per_sequence_quality_scores_plot.txt:md5,f33615cc98bb6225f39545a415fa7c0f", + "fastqc_sequence_counts_plot.txt:md5,7f0f19a58e8e54e792a751fd04a9ae13", + "fastqc_sequence_duplication_levels_plot.txt:md5,92b02e250ff78725deb9a10d510fcecc", + "fastqc_sequence_length_distribution_plot.txt:md5,fb04dce68ec566314125bc9438211b28", + "mosdepth-coverage-per-contig-single.txt:md5,c9dd01d00f91c6483202dcde61aa1f67", + "mosdepth-cumcoverage-dist-id.txt:md5,00c547f15d022eb446ec6367739c81b8", + "mosdepth_perchrom.txt:md5,c9dd01d00f91c6483202dcde61aa1f67", + "multiqc_citations.txt:md5,ace4ca89138a5f1e2be289c157c00bd9", + "multiqc_fastqc.txt:md5,bde0d0bffa62228b33fb68b7e25b6ff8", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,300b95526a211b05f18aaffd037dacd3", + "samtools_alignment_plot.txt:md5,5053f650b8612fe5e8527b0bca8ab905", + "test.freebayes.filtered.bcftools_stats.txt:md5,5ad7fc8a51dd1dc0f827a46acbed01c4", + "test.md.mosdepth.global.dist.txt:md5,531a83245143e7975f18e1988c876138", + "test.md.mosdepth.region.dist.txt:md5,d25723bdd3fec6b17d2462abfa097b9e", + "test.md.mosdepth.summary.txt:md5,87be70cd1237d7af9aa40d8cd8b3a817", + "test.md.per-base.bed.gz:md5,c53d26b767b6e75b3e502438a77f89b2", + "test.md.per-base.bed.gz.csi:md5,c3066b00781e14a9db5fc0bf0d47d777", + "test.md.regions.bed.gz:md5,f96fa1cdae548eb7e54ce6a481d928b9", + "test.md.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", + "test.recal.mosdepth.global.dist.txt:md5,a3e6c8f6d4b5e909d0527be83a93fbae", + "test.recal.mosdepth.region.dist.txt:md5,d25723bdd3fec6b17d2462abfa097b9e", + "test.recal.mosdepth.summary.txt:md5,ca5424a709268a61200a2dc2865f1a14", + "test.recal.per-base.bed.gz:md5,8aaf9cb3dd5c9643e77aba91293fc39d", + "test.recal.per-base.bed.gz.csi:md5,d8038c7d544abd5d6335f2541de4e769", + "test.recal.regions.bed.gz:md5,f96fa1cdae548eb7e54ce6a481d928b9", + "test.recal.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", + "test.freebayes.filtered.FILTER.summary:md5,259470a9823d503d5639946162b2ed19", + "test.freebayes.filtered.TsTv.count:md5,162253eb6c406300678985b3ac7dc868" + ], + "No BAM files", + [ + "test.md.cram:md5,724c601c9daf019d356a53a7d5e1c8b1", + "test.recal.cram:md5,e0b8589bcc82aecad1fbbc256e85a6ee" + ], + [ + [ + "test.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=475, phased=false, phasedAutodetect=false]" + ] + ], + [ + "test.freebayes.filtered.vcf.gz:md5,2062214d7062a3cf37ea4c1d123c1a43" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T22:35:44.029764132" + }, + "-profile test --tools freebayes --no_intervals": { + "content": [ + 24, + { + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_VC_FREEBAYES": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/test", + "reports/bcftools/freebayes/test/test.freebayes.filtered.bcftools_stats.txt", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test-test_L2", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_1_fastqc.zip", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.html", + "reports/fastqc/test-test_L2/test-test_L2_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/test", + "reports/vcftools/freebayes/test/test.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.qual", + "variant_calling", + "variant_calling/freebayes", + "variant_calling/freebayes/test", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/test/test.freebayes.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi" + ], + [ + "fastqc-status-check-heatmap.txt:md5,a020b9689ddeb4abec16b4854fe452f1", + "fastqc_adapter_content_plot.txt:md5,2e1b72be741319e7fadbbb39d7e5b37d", + "fastqc_per_base_n_content_plot.txt:md5,ad3b971a6bb4e8ba6c844c8a03584eb8", + "fastqc_per_base_sequence_quality_plot.txt:md5,1bc03889d243a944253ac637d81ae10c", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,2c42d140ce06c08dad2b58f397c23239", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,59e22821d350bfb97c37ffd9088f5ad9", + "fastqc_per_sequence_quality_scores_plot.txt:md5,f33615cc98bb6225f39545a415fa7c0f", + "fastqc_sequence_counts_plot.txt:md5,7f0f19a58e8e54e792a751fd04a9ae13", + "fastqc_sequence_duplication_levels_plot.txt:md5,92b02e250ff78725deb9a10d510fcecc", + "fastqc_sequence_length_distribution_plot.txt:md5,fb04dce68ec566314125bc9438211b28", + "mosdepth-coverage-per-contig-single.txt:md5,a0990e98cdd2cd5540d07f504f516ccf", + "mosdepth-cumcoverage-dist-id.txt:md5,caee7b9e5d1a451970f87d791c3e450b", + "mosdepth_perchrom.txt:md5,a0990e98cdd2cd5540d07f504f516ccf", + "multiqc_citations.txt:md5,ace4ca89138a5f1e2be289c157c00bd9", + "multiqc_fastqc.txt:md5,bde0d0bffa62228b33fb68b7e25b6ff8", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,9e836f09043529495de9e1c268ee8e82", + "samtools_alignment_plot.txt:md5,bcc2f176a4bc51b33a36bd4381f048a5", + "test.freebayes.filtered.bcftools_stats.txt:md5,c6b6e221504c69ee75b209f4a0b2506a", + "test.md.mosdepth.global.dist.txt:md5,ef7c375ae07aec5540f9892b9b556b73", + "test.md.mosdepth.region.dist.txt:md5,212efff2213f6fc1c3204daf68bbb8c8", + "test.md.mosdepth.summary.txt:md5,72114393647ff64503522760218b30f0", + "test.md.regions.bed.gz:md5,985db429051ddcd5eae177da6fb55ad6", + "test.md.regions.bed.gz.csi:md5,3fa0f8272fefafe3cd840376d34a94a2", + "test.recal.mosdepth.global.dist.txt:md5,ef7c375ae07aec5540f9892b9b556b73", + "test.recal.mosdepth.region.dist.txt:md5,212efff2213f6fc1c3204daf68bbb8c8", + "test.recal.mosdepth.summary.txt:md5,72114393647ff64503522760218b30f0", + "test.recal.regions.bed.gz:md5,985db429051ddcd5eae177da6fb55ad6", + "test.recal.regions.bed.gz.csi:md5,3fa0f8272fefafe3cd840376d34a94a2", + "test.freebayes.filtered.FILTER.summary:md5,9ae0931339e231f90a4b5c330f7f6d55", + "test.freebayes.filtered.TsTv.count:md5,845f64e5bb4224af98f3a47294cd5483" + ], + "No BAM files", + [ + "test.md.cram:md5,724c601c9daf019d356a53a7d5e1c8b1", + "test.recal.cram:md5,724c601c9daf019d356a53a7d5e1c8b1" + ], + [ + [ + "test.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=738, phased=false, phasedAutodetect=false]" + ] + ], + [ + "test.freebayes.filtered.vcf.gz:md5,bf085c88aa26191a55fbd23bff6a498f" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T22:38:03.564960868" + }, + "-profile test --tools freebayes --no_intervals --input recalibrated_tumoronly.csv": { + "content": [ + 14, + { + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_VC_FREEBAYES": { + "tabix": 1.21 + }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/sample2", + "reports/bcftools/freebayes/sample2/sample2.freebayes.filtered.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/sample2", + "reports/vcftools/freebayes/sample2/sample2.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/sample2/sample2.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/sample2/sample2.freebayes.filtered.TsTv.qual", + "variant_calling", + "variant_calling/freebayes", + "variant_calling/freebayes/sample2", + "variant_calling/freebayes/sample2/sample2.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/sample2/sample2.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz", + "variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,a0266cd06dd6f7f5463f09e3cb8af71d", + "mosdepth-cumcoverage-dist-id.txt:md5,88b94dd2dcc423983da65125ece7651e", + "mosdepth_perchrom.txt:md5,a0266cd06dd6f7f5463f09e3cb8af71d", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.freebayes.filtered.bcftools_stats.txt:md5,b83440f3699a3258015d0ce60eea0d46", + "sample2.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample2.recal.mosdepth.summary.txt:md5,0a7300e56eda6fba7c7564f00aa000f0", + "sample2.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample2.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample2.freebayes.filtered.FILTER.summary:md5,fac42829b9b347413acbfc9c0a17fef0", + "sample2.freebayes.filtered.TsTv.count:md5,f864a8b1d6bab1d5f877b60dde36863f" + ], + "No BAM files", + "No CRAM files", + [ + [ + "sample2.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=1, variantCount=15485, phased=false, phasedAutodetect=false]" + ] + ], + [ + "sample2.freebayes.filtered.vcf.gz:md5,e68ce412428016fcc1f09735a91cdef" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-04T12:22:29.186457" + }, + "-profile test --tools freebayes --wes --nucleotides_per_second 20 --input fastq_pair.csv": { + "content": [ + 56, + { + "BCFTOOLS_SORT": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "BWAMEM1_MEM": { + "bwa": "0.7.18-r1243-dirty", + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GATK4_APPLYBQSR": { + "gatk4": "4.6.1.0" + }, + "GATK4_BASERECALIBRATOR": { + "gatk4": "4.6.1.0" + }, + "GATK4_GATHERBQSRREPORTS": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.6.1.0", + "samtools": 1.21 + }, + "INDEX_CRAM": { + "samtools": 1.21 + }, + "MERGE_CRAM": { + "samtools": 1.21 + }, + "MERGE_FREEBAYES": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_VC_FREEBAYES_FILT": { + "tabix": 1.21 + }, + "VCFLIB_VCFFILTER": { + "vcflib": "1.0.14" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates.csv", + "csv/markduplicates_no_table.csv", + "csv/recalibrated.csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_adapter_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.txt", + "multiqc/multiqc_data/gatk-base-recalibrator-reported-empirical-plot.txt", + "multiqc/multiqc_data/gatk_base_recalibrator.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_picard_dups.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/picard_MarkIlluminaAdapters_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram.txt", + "multiqc/multiqc_data/picard_MeanQualityByCycle_histogram_1.txt", + "multiqc/multiqc_data/picard_QualityScoreDistribution_histogram.txt", + "multiqc/multiqc_data/picard_deduplication.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_adapter_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.pdf", + "multiqc/multiqc_plots/pdf/gatk-base-recalibrator-reported-empirical-plot.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-cnt.pdf", + "multiqc/multiqc_plots/pdf/picard_deduplication-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_adapter_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.png", + "multiqc/multiqc_plots/png/gatk-base-recalibrator-reported-empirical-plot.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/picard_deduplication-cnt.png", + "multiqc/multiqc_plots/png/picard_deduplication-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_adapter_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Count.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-quality-scores-plot_Pre-recalibration_Percent.svg", + "multiqc/multiqc_plots/svg/gatk-base-recalibrator-reported-empirical-plot.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-cnt.svg", + "multiqc/multiqc_plots/svg/picard_deduplication-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/markduplicates", + "preprocessing/markduplicates/test", + "preprocessing/markduplicates/test/test.md.cram", + "preprocessing/markduplicates/test/test.md.cram.crai", + "preprocessing/markduplicates/test2", + "preprocessing/markduplicates/test2/test2.md.cram", + "preprocessing/markduplicates/test2/test2.md.cram.crai", + "preprocessing/recal_table", + "preprocessing/recal_table/test", + "preprocessing/recal_table/test/test.recal.table", + "preprocessing/recal_table/test2", + "preprocessing/recal_table/test2/test2.recal.table", + "preprocessing/recalibrated", + "preprocessing/recalibrated/test", + "preprocessing/recalibrated/test/test.recal.cram", + "preprocessing/recalibrated/test/test.recal.cram.crai", + "preprocessing/recalibrated/test2", + "preprocessing/recalibrated/test2/test2.recal.cram", + "preprocessing/recalibrated/test2/test2.recal.cram.crai", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/freebayes", + "reports/bcftools/freebayes/test", + "reports/bcftools/freebayes/test/test.freebayes.filtered.bcftools_stats.txt", + "reports/bcftools/freebayes/test2_vs_test", + "reports/bcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.bcftools_stats.txt", + "reports/fastqc", + "reports/fastqc/test-test_L1", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_1_fastqc.zip", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.html", + "reports/fastqc/test-test_L1/test-test_L1_2_fastqc.zip", + "reports/fastqc/test2-test_L1", + "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.html", + "reports/fastqc/test2-test_L1/test2-test_L1_1_fastqc.zip", + "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.html", + "reports/fastqc/test2-test_L1/test2-test_L1_2_fastqc.zip", + "reports/markduplicates", + "reports/markduplicates/test", + "reports/markduplicates/test/test.md.cram.metrics", + "reports/markduplicates/test2", + "reports/markduplicates/test2/test2.md.cram.metrics", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.md.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.md.mosdepth.summary.txt", + "reports/mosdepth/test/test.md.per-base.bed.gz", + "reports/mosdepth/test/test.md.per-base.bed.gz.csi", + "reports/mosdepth/test/test.md.regions.bed.gz", + "reports/mosdepth/test/test.md.regions.bed.gz.csi", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/mosdepth/test2", + "reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt", + "reports/mosdepth/test2/test2.md.mosdepth.region.dist.txt", + "reports/mosdepth/test2/test2.md.mosdepth.summary.txt", + "reports/mosdepth/test2/test2.md.per-base.bed.gz", + "reports/mosdepth/test2/test2.md.per-base.bed.gz.csi", + "reports/mosdepth/test2/test2.md.regions.bed.gz", + "reports/mosdepth/test2/test2.md.regions.bed.gz.csi", + "reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test2/test2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test2/test2.recal.mosdepth.summary.txt", + "reports/mosdepth/test2/test2.recal.per-base.bed.gz", + "reports/mosdepth/test2/test2.recal.per-base.bed.gz.csi", + "reports/mosdepth/test2/test2.recal.regions.bed.gz", + "reports/mosdepth/test2/test2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.md.cram.stats", + "reports/samtools/test/test.recal.cram.stats", + "reports/samtools/test2", + "reports/samtools/test2/test2.md.cram.stats", + "reports/samtools/test2/test2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/freebayes", + "reports/vcftools/freebayes/test", + "reports/vcftools/freebayes/test/test.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/test/test.freebayes.filtered.TsTv.qual", + "reports/vcftools/freebayes/test2_vs_test", + "reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.FILTER.summary", + "reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.TsTv.count", + "reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.TsTv.qual", + "variant_calling", + "variant_calling/freebayes", + "variant_calling/freebayes/test", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/test/test.freebayes.vcf.gz", + "variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi", + "variant_calling/freebayes/test2_vs_test", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.vcf.gz", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.filtered.vcf.gz.tbi", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz", + "variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz.tbi" + ], + [ + "fastqc-status-check-heatmap.txt:md5,eeb4e7e7a45f4223c86bfe3aea81f90b", + "fastqc_adapter_content_plot.txt:md5,cc7a809f9f001c10646ee4199ccdb40f", + "fastqc_per_base_n_content_plot.txt:md5,1eba855ae0fa5b5ed4a1f90d1c97f759", + "fastqc_per_base_sequence_quality_plot.txt:md5,cbb2743dfb2ec74e72b578c83ec28ee8", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,73c884822eba0bafcdf34b90fe81aec5", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,24eeb00e5e2b11c7ab90a3223d429d15", + "fastqc_per_sequence_quality_scores_plot.txt:md5,6f048594f02effb93608665be29bd35a", + "fastqc_sequence_counts_plot.txt:md5,fca7ee9ef3382e2837a302d8c5d33769", + "fastqc_sequence_duplication_levels_plot.txt:md5,2aa0c6f33e4cffbb29cdabe2c28bb097", + "fastqc_sequence_length_distribution_plot.txt:md5,61b1fe978a2c73b86c30c27ee4bc60ae", + "mosdepth-coverage-per-contig-single.txt:md5,751cdbc07ca147c86d83a0232c40ef9e", + "mosdepth-cumcoverage-dist-id.txt:md5,d730579d62465113709f6f061629c687", + "mosdepth_perchrom.txt:md5,751cdbc07ca147c86d83a0232c40ef9e", + "multiqc_citations.txt:md5,ace4ca89138a5f1e2be289c157c00bd9", + "multiqc_fastqc.txt:md5,2fd25e8c81f962594b801d5a9df3cd87", + "picard_MarkIlluminaAdapters_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_MeanQualityByCycle_histogram_1.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "picard_QualityScoreDistribution_histogram.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "samtools-stats-dp.txt:md5,ff7906db0da2f9c9ba5ce85c34694124", + "samtools_alignment_plot.txt:md5,89d0a6e7076223e9feadbecd794948d5", + "test.freebayes.filtered.bcftools_stats.txt:md5,b313075b3e9854e0b561dd8d754bf097", + "test2_vs_test.freebayes.filtered.bcftools_stats.txt:md5,17df64b64f9235afa93aae6bbe08e283", + "test.md.mosdepth.global.dist.txt:md5,5a0679057c530e5945c9c5a3a17312dc", + "test.md.mosdepth.region.dist.txt:md5,835fdc6fa52cc33e6fb76c0c20a8a6c3", + "test.md.mosdepth.summary.txt:md5,dcc9ab2bf3248903e02d8da87e678977", + "test.md.per-base.bed.gz:md5,34dfe443c0a0767562dd65272e3310ef", + "test.md.per-base.bed.gz.csi:md5,b0ab630c3241fbd7581b7a38d944ff8b", + "test.md.regions.bed.gz:md5,99cc80b920ba574e7d9ef8f59f54f7c6", + "test.md.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", + "test.recal.mosdepth.global.dist.txt:md5,0b3162def977123809598639f7698121", + "test.recal.mosdepth.region.dist.txt:md5,835fdc6fa52cc33e6fb76c0c20a8a6c3", + "test.recal.mosdepth.summary.txt:md5,a8455eb2947de529abfa62b303986e0f", + "test.recal.per-base.bed.gz:md5,c075ccd2b847c7c04061a39717faeb30", + "test.recal.per-base.bed.gz.csi:md5,4816eeb9af254ca40177b08cf11b98d2", + "test.recal.regions.bed.gz:md5,99cc80b920ba574e7d9ef8f59f54f7c6", + "test.recal.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", + "test2.md.mosdepth.global.dist.txt:md5,f25166c3a0051bb4d8c11a210278de6c", + "test2.md.mosdepth.region.dist.txt:md5,3211135329e4077bd9bf0ba488e14371", + "test2.md.mosdepth.summary.txt:md5,ce0eb6d33c6d0dc720fbc6d1811abef8", + "test2.md.per-base.bed.gz:md5,e1c6d60621d8f64aaf28fa1c1ddda921", + "test2.md.per-base.bed.gz.csi:md5,4205a09ede17cdbdaad45e3553f73105", + "test2.md.regions.bed.gz:md5,0bb2549180165a99680ba3e453ea312f", + "test2.md.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", + "test2.recal.mosdepth.global.dist.txt:md5,a1ef7e662ce993da4668e804952014ce", + "test2.recal.mosdepth.region.dist.txt:md5,3211135329e4077bd9bf0ba488e14371", + "test2.recal.mosdepth.summary.txt:md5,70ad653c0c98baeeaf5085f1209a7bdb", + "test2.recal.per-base.bed.gz:md5,e992ef845ec91a3612297952a23ba579", + "test2.recal.per-base.bed.gz.csi:md5,8072f447199c60f24b01eede8b557333", + "test2.recal.regions.bed.gz:md5,0bb2549180165a99680ba3e453ea312f", + "test2.recal.regions.bed.gz.csi:md5,c6d1ac97ef4dfe43731c8368d8391cab", + "test.freebayes.filtered.FILTER.summary:md5,449597c35ada505b4cb2530d5260e9d5", + "test.freebayes.filtered.TsTv.count:md5,162253eb6c406300678985b3ac7dc868", + "test2_vs_test.freebayes.filtered.FILTER.summary:md5,0ae7467f2311c1382173d70d8d7efb0b", + "test2_vs_test.freebayes.filtered.TsTv.count:md5,9dc940f98dae9c0b49c9468a491836d4" + ], + "No BAM files", + [ + "test.md.cram:md5,59ecc5c82c7af1283eea7507c590c831", + "test2.md.cram:md5,bac87cf9290577fd9a4def63e046031f", + "test.recal.cram:md5,6a28675f6e294b3822952968d86a4868", + "test2.recal.cram:md5,cf197ddea4e392bfdccc817787fc4eb4" + ], + [ + [ + "test.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=474, phased=false, phasedAutodetect=false]" + ], + [ + "test2_vs_test.freebayes.vcf.gz", + "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=966, phased=false, phasedAutodetect=false]" + ] + ], + [ + "test.freebayes.filtered.vcf.gz:md5,c4793897a38d6781dc512a52d9046be5", + "test2_vs_test.freebayes.filtered.vcf.gz:md5,d1d0916fc56a666bd7637792047b82f8" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998513#1/1", + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T22:40:36.195304962" + } +} diff --git a/tests/variant_calling_haplotypecaller.nf.test b/tests/variant_calling_haplotypecaller.nf.test new file mode 100644 index 0000000000..d7abedb47b --- /dev/null +++ b/tests/variant_calling_haplotypecaller.nf.test @@ -0,0 +1,65 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --input mapped_single_bam.csv --tools haplotypecaller --step variant_calling --wes --nucleotides_per_second 20", + params: [ + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/mapped_single_bam.csv", + step: "variant_calling", + tools: 'haplotypecaller', + wes: true, + nucleotides_per_second: 20 + ], + no_conda: true + ], + [ + name: "-profile test --input mapped_single_bam.csv --tools haplotypecaller --step variant_calling --wes --nucleotides_per_second 20 --no_intervals", + params: [ + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/mapped_single_bam.csv", + step: "variant_calling", + tools: 'haplotypecaller', + wes: true, + nucleotides_per_second: 20, + no_intervals: true + ], + no_conda: true + ], + [ + name: "-profile test --input mapped_single_bam.csv --tools haplotypecaller --step variant_calling --wes --nucleotides_per_second 20 --skip_tools haplotypecaller_filter", + params: [ + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/mapped_single_bam.csv", + step: "variant_calling", + tools: 'haplotypecaller', + wes: true, + nucleotides_per_second: 20, + skip_tools: 'haplotypecaller_filter' + ] + ], + [ + name: "-profile test --input mapped_single_bam.csv --tools haplotypecaller --step variant_calling --wes --nucleotides_per_second 20 --skip_tools haplotypecaller_filter --no_intervals", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_bam.csv", + step: "variant_calling", + tools: 'haplotypecaller', + wes: true, + nucleotides_per_second: 20, + skip_tools: 'haplotypecaller_filter', + no_intervals: true + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_haplotypecaller.nf.test.snap b/tests/variant_calling_haplotypecaller.nf.test.snap new file mode 100644 index 0000000000..6425bd078f --- /dev/null +++ b/tests/variant_calling_haplotypecaller.nf.test.snap @@ -0,0 +1,730 @@ +{ + "-profile test --input mapped_single_bam.csv --tools haplotypecaller --step variant_calling --wes --nucleotides_per_second 20 --skip_tools haplotypecaller_filter --no_intervals": { + "content": [ + 9, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "GATK4_HAPLOTYPECALLER": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/haplotypecaller", + "reports/bcftools/haplotypecaller/test", + "reports/bcftools/haplotypecaller/test/test.haplotypecaller.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/haplotypecaller", + "reports/vcftools/haplotypecaller/test", + "reports/vcftools/haplotypecaller/test/test.haplotypecaller.FILTER.summary", + "reports/vcftools/haplotypecaller/test/test.haplotypecaller.TsTv.count", + "reports/vcftools/haplotypecaller/test/test.haplotypecaller.TsTv.qual", + "variant_calling", + "variant_calling/haplotypecaller", + "variant_calling/haplotypecaller/test", + "variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz", + "variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,93a8fd4ad5c5a4fcdf9e67a07183c7cb", + "mosdepth-cumcoverage-dist-id.txt:md5,24098c80b7a75e41cd68ef1f2154b7b0", + "mosdepth_perchrom.txt:md5,93a8fd4ad5c5a4fcdf9e67a07183c7cb", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,06abce459a3e3192252b6196ae3b189e", + "samtools_alignment_plot.txt:md5,3622ecb71829cc1c581157e403048fd1", + "test.haplotypecaller.bcftools_stats.txt:md5,1497941f37b14c39a24490a50a97e365", + "test.recal.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e", + "test.recal.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59", + "test.recal.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa", + "test.recal.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4", + "test.haplotypecaller.FILTER.summary:md5,87a84b5f8ac3d3cbeeef7d60afcdbfe7", + "test.haplotypecaller.TsTv.count:md5,b77c120ee5cc0423267200c67d60c663" + ], + "No BAM files", + "No CRAM files", + [ + "test.haplotypecaller.vcf.gz:md5,9d9d103327d59d17e778b663b56136fb" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T22:57:09.952875456" + }, + "-profile test --input mapped_single_bam.csv --tools haplotypecaller --step variant_calling --wes --nucleotides_per_second 20 --skip_tools haplotypecaller_filter": { + "content": [ + 14, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_HAPLOTYPECALLER": { + "gatk4": "4.6.1.0" + }, + "MERGE_HAPLOTYPECALLER": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/haplotypecaller", + "reports/bcftools/haplotypecaller/test", + "reports/bcftools/haplotypecaller/test/test.haplotypecaller.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/haplotypecaller", + "reports/vcftools/haplotypecaller/test", + "reports/vcftools/haplotypecaller/test/test.haplotypecaller.FILTER.summary", + "reports/vcftools/haplotypecaller/test/test.haplotypecaller.TsTv.count", + "reports/vcftools/haplotypecaller/test/test.haplotypecaller.TsTv.qual", + "variant_calling", + "variant_calling/haplotypecaller", + "variant_calling/haplotypecaller/test", + "variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz", + "variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,6e1f4a26793f1912cd83cee7259635c6", + "mosdepth-cumcoverage-dist-id.txt:md5,cff5ad3df703fd30cd655ef53caf75fb", + "mosdepth_perchrom.txt:md5,6e1f4a26793f1912cd83cee7259635c6", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,06abce459a3e3192252b6196ae3b189e", + "samtools_alignment_plot.txt:md5,3622ecb71829cc1c581157e403048fd1", + "test.haplotypecaller.bcftools_stats.txt:md5,1497941f37b14c39a24490a50a97e365", + "test.recal.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e", + "test.recal.mosdepth.region.dist.txt:md5,3a2030e5e8af7bc12720c3a5592bf921", + "test.recal.mosdepth.summary.txt:md5,615c5c5019d88045a9ff5bbe6e63d270", + "test.recal.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa", + "test.recal.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4", + "test.recal.regions.bed.gz:md5,0c8215fbea7b0bf7aba9d1781575f905", + "test.recal.regions.bed.gz.csi:md5,5c00a1d457c387d6e71848a6d897e309", + "test.haplotypecaller.FILTER.summary:md5,87a84b5f8ac3d3cbeeef7d60afcdbfe7", + "test.haplotypecaller.TsTv.count:md5,b77c120ee5cc0423267200c67d60c663" + ], + "No BAM files", + "No CRAM files", + [ + "test.haplotypecaller.vcf.gz:md5,9d9d103327d59d17e778b663b56136fb" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T22:56:07.158030738" + }, + "-profile test --input mapped_single_bam.csv --tools haplotypecaller --step variant_calling --wes --nucleotides_per_second 20 --no_intervals": { + "content": [ + 11, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CNNSCOREVARIANTS": { + "gatk4": "4.5.0.0" + }, + "FILTERVARIANTTRANCHES": { + "gatk4": "4.6.1.0" + }, + "GATK4_HAPLOTYPECALLER": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/haplotypecaller", + "reports/bcftools/haplotypecaller/test", + "reports/bcftools/haplotypecaller/test/test.haplotypecaller.filtered.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/haplotypecaller", + "reports/vcftools/haplotypecaller/test", + "reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.FILTER.summary", + "reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.TsTv.count", + "reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.TsTv.qual", + "variant_calling", + "variant_calling/haplotypecaller", + "variant_calling/haplotypecaller/test", + "variant_calling/haplotypecaller/test/test.haplotypecaller.filtered.vcf.gz", + "variant_calling/haplotypecaller/test/test.haplotypecaller.filtered.vcf.gz.tbi", + "variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz", + "variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,93a8fd4ad5c5a4fcdf9e67a07183c7cb", + "mosdepth-cumcoverage-dist-id.txt:md5,24098c80b7a75e41cd68ef1f2154b7b0", + "mosdepth_perchrom.txt:md5,93a8fd4ad5c5a4fcdf9e67a07183c7cb", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,06abce459a3e3192252b6196ae3b189e", + "samtools_alignment_plot.txt:md5,3622ecb71829cc1c581157e403048fd1", + "test.haplotypecaller.filtered.bcftools_stats.txt:md5,bfdbcc0c0513be1e223434eefee3b90b", + "test.recal.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e", + "test.recal.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59", + "test.recal.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa", + "test.recal.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4", + "test.haplotypecaller.filtered.FILTER.summary:md5,4e2ceea7f3ff998004691fd71192d9ee", + "test.haplotypecaller.filtered.TsTv.count:md5,b77c120ee5cc0423267200c67d60c663" + ], + "No BAM files", + "No CRAM files", + [ + "test.haplotypecaller.filtered.vcf.gz:md5,df2040bf1bee0252581824d11d5d87d1", + "test.haplotypecaller.vcf.gz:md5,9d9d103327d59d17e778b663b56136fb" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T22:54:55.763188536" + }, + "-profile test --input mapped_single_bam.csv --tools haplotypecaller --step variant_calling --wes --nucleotides_per_second 20": { + "content": [ + 16, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CNNSCOREVARIANTS": { + "gatk4": "4.5.0.0" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FILTERVARIANTTRANCHES": { + "gatk4": "4.6.1.0" + }, + "GATK4_HAPLOTYPECALLER": { + "gatk4": "4.6.1.0" + }, + "MERGE_HAPLOTYPECALLER": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/haplotypecaller", + "reports/bcftools/haplotypecaller/test", + "reports/bcftools/haplotypecaller/test/test.haplotypecaller.filtered.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/haplotypecaller", + "reports/vcftools/haplotypecaller/test", + "reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.FILTER.summary", + "reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.TsTv.count", + "reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.TsTv.qual", + "variant_calling", + "variant_calling/haplotypecaller", + "variant_calling/haplotypecaller/test", + "variant_calling/haplotypecaller/test/test.haplotypecaller.filtered.vcf.gz", + "variant_calling/haplotypecaller/test/test.haplotypecaller.filtered.vcf.gz.tbi", + "variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz", + "variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,6e1f4a26793f1912cd83cee7259635c6", + "mosdepth-cumcoverage-dist-id.txt:md5,cff5ad3df703fd30cd655ef53caf75fb", + "mosdepth_perchrom.txt:md5,6e1f4a26793f1912cd83cee7259635c6", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,06abce459a3e3192252b6196ae3b189e", + "samtools_alignment_plot.txt:md5,3622ecb71829cc1c581157e403048fd1", + "test.haplotypecaller.filtered.bcftools_stats.txt:md5,bfdbcc0c0513be1e223434eefee3b90b", + "test.recal.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e", + "test.recal.mosdepth.region.dist.txt:md5,3a2030e5e8af7bc12720c3a5592bf921", + "test.recal.mosdepth.summary.txt:md5,615c5c5019d88045a9ff5bbe6e63d270", + "test.recal.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa", + "test.recal.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4", + "test.recal.regions.bed.gz:md5,0c8215fbea7b0bf7aba9d1781575f905", + "test.recal.regions.bed.gz.csi:md5,5c00a1d457c387d6e71848a6d897e309", + "test.haplotypecaller.filtered.FILTER.summary:md5,4e2ceea7f3ff998004691fd71192d9ee", + "test.haplotypecaller.filtered.TsTv.count:md5,b77c120ee5cc0423267200c67d60c663" + ], + "No BAM files", + "No CRAM files", + [ + "test.haplotypecaller.filtered.vcf.gz:md5,df2040bf1bee0252581824d11d5d87d1", + "test.haplotypecaller.vcf.gz:md5,9d9d103327d59d17e778b663b56136fb" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T22:53:34.886097542" + } +} diff --git a/tests/variant_calling_lofreq.nf.test b/tests/variant_calling_lofreq.nf.test new file mode 100644 index 0000000000..25be3eec4a --- /dev/null +++ b/tests/variant_calling_lofreq.nf.test @@ -0,0 +1,48 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools lofreq tumoronly", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + step: "variant_calling", + tools: 'lofreq', + wes: true, + genome: null, + igenomes_ignore: true + ], + // Instable MD5SUM between conda and docker/singularity + no_vcf_md5sum: true, + ], + [ + name: "-profile test --tools lofreq --no_intervals tumoronly", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + step: "variant_calling", + tools: 'lofreq', + wes: true, + genome: null, + igenomes_ignore: true, + no_intervals: true + ], + // Instable MD5SUM between conda and docker/singularity + no_vcf_md5sum: true, + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_lofreq.nf.test.snap b/tests/variant_calling_lofreq.nf.test.snap new file mode 100644 index 0000000000..53574ab894 --- /dev/null +++ b/tests/variant_calling_lofreq.nf.test.snap @@ -0,0 +1,350 @@ +{ + "-profile test --tools lofreq tumoronly": { + "content": [ + 11, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "LOFREQ": { + "lofreq": "2.1.5" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/lofreq", + "reports/bcftools/lofreq/sample2", + "reports/bcftools/lofreq/sample2/sample2.lofreq.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/lofreq", + "reports/vcftools/lofreq/sample2", + "reports/vcftools/lofreq/sample2/sample2.lofreq.FILTER.summary", + "reports/vcftools/lofreq/sample2/sample2.lofreq.TsTv.qual", + "variant_calling", + "variant_calling/lofreq", + "variant_calling/lofreq/sample2", + "variant_calling/lofreq/sample2/sample2.lofreq.vcf.gz", + "variant_calling/lofreq/sample2/sample2.lofreq.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,1f24f2f40467234c410a8bda544a8aae", + "mosdepth-cumcoverage-dist-id.txt:md5,1036ea76acae803f591fd99838a8eded", + "mosdepth_perchrom.txt:md5,1f24f2f40467234c410a8bda544a8aae", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.lofreq.bcftools_stats.txt:md5,a8a850fdd11644fa4b770971dfe37194", + "sample2.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample2.recal.mosdepth.region.dist.txt:md5,39005ffaac22871ffaaf19656fe69c5b", + "sample2.recal.mosdepth.summary.txt:md5,68d4b98f17361fddf73052ead34fa370", + "sample2.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample2.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample2.recal.regions.bed.gz:md5,b7561bc56a955f7db0f11e67e2ec0386", + "sample2.recal.regions.bed.gz.csi:md5,393c2749068304d8545b501b9d4658e4", + "sample2.lofreq.FILTER.summary:md5,8dd8a0c91d5c4a260b462e04f615e502" + ], + "No BAM files", + "No CRAM files", + [ + [ + "sample2.lofreq.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=0, variantCount=69, phased=true, phasedAutodetect=true]" + ] + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T22:59:00.143144189" + }, + "-profile test --tools lofreq --no_intervals tumoronly": { + "content": [ + 9, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "LOFREQ": { + "lofreq": "2.1.5" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/lofreq", + "reports/bcftools/lofreq/sample2", + "reports/bcftools/lofreq/sample2/sample2.lofreq.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/lofreq", + "reports/vcftools/lofreq/sample2", + "reports/vcftools/lofreq/sample2/sample2.lofreq.FILTER.summary", + "reports/vcftools/lofreq/sample2/sample2.lofreq.TsTv.qual", + "variant_calling", + "variant_calling/lofreq", + "variant_calling/lofreq/sample2", + "variant_calling/lofreq/sample2/sample2.lofreq.vcf.gz", + "variant_calling/lofreq/sample2/sample2.lofreq.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,a0266cd06dd6f7f5463f09e3cb8af71d", + "mosdepth-cumcoverage-dist-id.txt:md5,88b94dd2dcc423983da65125ece7651e", + "mosdepth_perchrom.txt:md5,a0266cd06dd6f7f5463f09e3cb8af71d", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.lofreq.bcftools_stats.txt:md5,dd602205b6d368eb0e21d2a94c36e0de", + "sample2.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample2.recal.mosdepth.summary.txt:md5,0a7300e56eda6fba7c7564f00aa000f0", + "sample2.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample2.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample2.lofreq.FILTER.summary:md5,72beda1b57da053eb352204828605a40" + ], + "No BAM files", + "No CRAM files", + [ + [ + "sample2.lofreq.vcf.gz", + "VcfFile [chromosomes=[chr21], sampleCount=0, variantCount=70, phased=true, phasedAutodetect=true]" + ] + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:00:34.897541163" + } +} diff --git a/tests/variant_calling_manta.nf.test b/tests/variant_calling_manta.nf.test new file mode 100644 index 0000000000..ef289dc528 --- /dev/null +++ b/tests/variant_calling_manta.nf.test @@ -0,0 +1,96 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools manta --only_paired_variant_calling", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated.csv", + step: "variant_calling", + tools: 'manta', + only_paired_variant_calling: true + ] + ], + [ + name: "-profile test --tools manta germline", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_germline.csv", + step: "variant_calling", + tools: 'manta' + ] + ], + [ + name: "-profile test --tools manta --no_intervals germline", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_germline.csv", + no_intervals: true, + step: "variant_calling", + tools: 'manta' + ] + ], + [ + name: "-profile test --tools manta somatic", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + step: "variant_calling", + tools: 'manta' + ] + ], + [ + name: "-profile test --tools manta --no_intervals somatic", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + no_intervals: true, + step: "variant_calling", + tools: 'manta' + ] + ], + [ + name: "-profile test --tools manta tumoronly", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + step: "variant_calling", + tools: 'manta' + ] + ], + [ + name: "-profile test --tools manta --no_intervals tumoronly", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + no_intervals: true, + step: "variant_calling", + tools: 'manta' + ] + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_manta.nf.test.snap b/tests/variant_calling_manta.nf.test.snap new file mode 100644 index 0000000000..41efc76ca9 --- /dev/null +++ b/tests/variant_calling_manta.nf.test.snap @@ -0,0 +1,1129 @@ +{ + "-profile test --tools manta germline": { + "content": [ + 11, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MANTA_GERMLINE": { + "manta": "1.6.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/manta", + "reports/bcftools/manta/sample1", + "reports/bcftools/manta/sample1/sample1.manta.diploid_sv.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample1", + "reports/mosdepth/sample1/sample1.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.summary.txt", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample1", + "reports/samtools/sample1/sample1.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/manta", + "reports/vcftools/manta/sample1", + "reports/vcftools/manta/sample1/sample1.manta.diploid_sv.FILTER.summary", + "reports/vcftools/manta/sample1/sample1.manta.diploid_sv.TsTv.count", + "reports/vcftools/manta/sample1/sample1.manta.diploid_sv.TsTv.qual", + "variant_calling", + "variant_calling/manta", + "variant_calling/manta/sample1", + "variant_calling/manta/sample1/sample1.manta.diploid_sv.vcf.gz", + "variant_calling/manta/sample1/sample1.manta.diploid_sv.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,49fd828d21d79a5c9430cdb30d7f0126", + "mosdepth-cumcoverage-dist-id.txt:md5,edd8dfc8023e629e476e01c3a1448d30", + "mosdepth_perchrom.txt:md5,49fd828d21d79a5c9430cdb30d7f0126", + "multiqc_citations.txt:md5,e49571d90d78a0d1bc919b82d7dc48c6", + "samtools-stats-dp.txt:md5,41d36c1a8413d565788a1a364b467606", + "samtools_alignment_plot.txt:md5,35f8fd2a557568b2237193f46afbab5c", + "sample1.manta.diploid_sv.bcftools_stats.txt:md5,636109db283cbee4539786928c811893", + "sample1.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample1.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample1.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample1.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample1.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample1.manta.diploid_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample1.manta.diploid_sv.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a" + ], + "No BAM files", + "No CRAM files", + [ + "sample1.manta.diploid_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:03:03.670841382" + }, + "-profile test --tools manta --no_intervals tumoronly": { + "content": [ + 9, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "MANTA_TUMORONLY": { + "manta": "1.6.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/manta", + "reports/bcftools/manta/sample2", + "reports/bcftools/manta/sample2/sample2.manta.tumor_sv.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/manta", + "reports/vcftools/manta/sample2", + "reports/vcftools/manta/sample2/sample2.manta.tumor_sv.FILTER.summary", + "reports/vcftools/manta/sample2/sample2.manta.tumor_sv.TsTv.count", + "reports/vcftools/manta/sample2/sample2.manta.tumor_sv.TsTv.qual", + "variant_calling", + "variant_calling/manta", + "variant_calling/manta/sample2", + "variant_calling/manta/sample2/sample2.manta.tumor_sv.vcf.gz", + "variant_calling/manta/sample2/sample2.manta.tumor_sv.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,ff38f1e35ce12244c751921db673b23c", + "mosdepth-cumcoverage-dist-id.txt:md5,8398d1127a10d0c002831ddedfb9713b", + "mosdepth_perchrom.txt:md5,ff38f1e35ce12244c751921db673b23c", + "multiqc_citations.txt:md5,e49571d90d78a0d1bc919b82d7dc48c6", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.manta.tumor_sv.bcftools_stats.txt:md5,9fbe26c75869000b526b59b454f76f6a", + "sample2.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample2.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample2.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample2.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample2.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample2.manta.tumor_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample2.manta.tumor_sv.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a" + ], + "No BAM files", + "No CRAM files", + [ + "sample2.manta.tumor_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:08:29.995088195" + }, + "-profile test --tools manta --no_intervals somatic": { + "content": [ + 20, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "MANTA_GERMLINE": { + "manta": "1.6.0" + }, + "MANTA_SOMATIC": { + "manta": "1.6.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/manta", + "reports/bcftools/manta/sample3", + "reports/bcftools/manta/sample3/sample3.manta.diploid_sv.bcftools_stats.txt", + "reports/bcftools/manta/sample4_vs_sample3", + "reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt", + "reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/manta", + "reports/vcftools/manta/sample3", + "reports/vcftools/manta/sample3/sample3.manta.diploid_sv.FILTER.summary", + "reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.count", + "reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.qual", + "reports/vcftools/manta/sample4_vs_sample3", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.FILTER.summary", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.count", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.qual", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.FILTER.summary", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.count", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.qual", + "variant_calling", + "variant_calling/manta", + "variant_calling/manta/sample3", + "variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz", + "variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz.tbi", + "variant_calling/manta/sample4_vs_sample3", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz.tbi", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,336d786b273c4d4e714d51c44207ff56", + "mosdepth-cumcoverage-dist-id.txt:md5,0cc87c596a0fc30ebb676c4587e986de", + "mosdepth_perchrom.txt:md5,336d786b273c4d4e714d51c44207ff56", + "multiqc_citations.txt:md5,e49571d90d78a0d1bc919b82d7dc48c6", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample3.manta.diploid_sv.bcftools_stats.txt:md5,36a838390faba81e3eabf5ac8a093a4a", + "sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt:md5,f00cf810d34ef7e5c7980f7039bb4446", + "sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt:md5,7af2ea2e84154ddf2a483b1bd1f0646c", + "sample3.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample3.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample3.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample3.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample3.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample4.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample4.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample4.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample4.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample4.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample3.manta.diploid_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample3.manta.diploid_sv.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "sample4_vs_sample3.manta.diploid_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample4_vs_sample3.manta.diploid_sv.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "sample4_vs_sample3.manta.somatic_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample4_vs_sample3.manta.somatic_sv.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f" + ], + "No BAM files", + "No CRAM files", + [ + "sample3.manta.diploid_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample4_vs_sample3.manta.diploid_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample4_vs_sample3.manta.somatic_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:06:20.662942446" + }, + "-profile test --tools manta tumoronly": { + "content": [ + 11, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MANTA_TUMORONLY": { + "manta": "1.6.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/manta", + "reports/bcftools/manta/sample2", + "reports/bcftools/manta/sample2/sample2.manta.tumor_sv.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/manta", + "reports/vcftools/manta/sample2", + "reports/vcftools/manta/sample2/sample2.manta.tumor_sv.FILTER.summary", + "reports/vcftools/manta/sample2/sample2.manta.tumor_sv.TsTv.count", + "reports/vcftools/manta/sample2/sample2.manta.tumor_sv.TsTv.qual", + "variant_calling", + "variant_calling/manta", + "variant_calling/manta/sample2", + "variant_calling/manta/sample2/sample2.manta.tumor_sv.vcf.gz", + "variant_calling/manta/sample2/sample2.manta.tumor_sv.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,ff38f1e35ce12244c751921db673b23c", + "mosdepth-cumcoverage-dist-id.txt:md5,8398d1127a10d0c002831ddedfb9713b", + "mosdepth_perchrom.txt:md5,ff38f1e35ce12244c751921db673b23c", + "multiqc_citations.txt:md5,e49571d90d78a0d1bc919b82d7dc48c6", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.manta.tumor_sv.bcftools_stats.txt:md5,9fbe26c75869000b526b59b454f76f6a", + "sample2.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample2.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample2.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample2.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample2.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample2.manta.tumor_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample2.manta.tumor_sv.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a" + ], + "No BAM files", + "No CRAM files", + [ + "sample2.manta.tumor_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:07:26.563804504" + }, + "-profile test --tools manta --only_paired_variant_calling": { + "content": [ + 31, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MANTA_GERMLINE": { + "manta": "1.6.0" + }, + "MANTA_SOMATIC": { + "manta": "1.6.0" + }, + "MANTA_TUMORONLY": { + "manta": "1.6.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/manta", + "reports/bcftools/manta/sample1", + "reports/bcftools/manta/sample1/sample1.manta.diploid_sv.bcftools_stats.txt", + "reports/bcftools/manta/sample2", + "reports/bcftools/manta/sample2/sample2.manta.tumor_sv.bcftools_stats.txt", + "reports/bcftools/manta/sample4_vs_sample3", + "reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt", + "reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample1", + "reports/mosdepth/sample1/sample1.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.summary.txt", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz.csi", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample1", + "reports/samtools/sample1/sample1.recal.cram.stats", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/manta", + "reports/vcftools/manta/sample1", + "reports/vcftools/manta/sample1/sample1.manta.diploid_sv.FILTER.summary", + "reports/vcftools/manta/sample1/sample1.manta.diploid_sv.TsTv.count", + "reports/vcftools/manta/sample1/sample1.manta.diploid_sv.TsTv.qual", + "reports/vcftools/manta/sample2", + "reports/vcftools/manta/sample2/sample2.manta.tumor_sv.FILTER.summary", + "reports/vcftools/manta/sample2/sample2.manta.tumor_sv.TsTv.count", + "reports/vcftools/manta/sample2/sample2.manta.tumor_sv.TsTv.qual", + "reports/vcftools/manta/sample4_vs_sample3", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.FILTER.summary", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.count", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.qual", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.FILTER.summary", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.count", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.qual", + "variant_calling", + "variant_calling/manta", + "variant_calling/manta/sample1", + "variant_calling/manta/sample1/sample1.manta.diploid_sv.vcf.gz", + "variant_calling/manta/sample1/sample1.manta.diploid_sv.vcf.gz.tbi", + "variant_calling/manta/sample2", + "variant_calling/manta/sample2/sample2.manta.tumor_sv.vcf.gz", + "variant_calling/manta/sample2/sample2.manta.tumor_sv.vcf.gz.tbi", + "variant_calling/manta/sample4_vs_sample3", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz.tbi", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,73ef9a077df1887f9021a581fbf207bc", + "mosdepth-cumcoverage-dist-id.txt:md5,ad0637d55d7025330f2f6cb7f9680e64", + "mosdepth_perchrom.txt:md5,73ef9a077df1887f9021a581fbf207bc", + "multiqc_citations.txt:md5,e49571d90d78a0d1bc919b82d7dc48c6", + "samtools-stats-dp.txt:md5,b446a47b182d93a9e7e74f5f7c8d41c2", + "samtools_alignment_plot.txt:md5,7138a2d29f515993e1df8d745e27b757", + "sample1.manta.diploid_sv.bcftools_stats.txt:md5,636109db283cbee4539786928c811893", + "sample2.manta.tumor_sv.bcftools_stats.txt:md5,9fbe26c75869000b526b59b454f76f6a", + "sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt:md5,f00cf810d34ef7e5c7980f7039bb4446", + "sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt:md5,7af2ea2e84154ddf2a483b1bd1f0646c", + "sample1.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample1.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample1.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample1.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample1.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample2.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample2.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample2.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample2.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample2.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample3.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample3.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample3.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample3.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample3.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample4.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample4.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample4.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample4.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample4.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample1.manta.diploid_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample1.manta.diploid_sv.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "sample2.manta.tumor_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample2.manta.tumor_sv.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "sample4_vs_sample3.manta.diploid_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample4_vs_sample3.manta.diploid_sv.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "sample4_vs_sample3.manta.somatic_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample4_vs_sample3.manta.somatic_sv.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f" + ], + "No BAM files", + "No CRAM files", + [ + "sample1.manta.diploid_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample2.manta.tumor_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample4_vs_sample3.manta.diploid_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample4_vs_sample3.manta.somatic_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:01:59.085899814" + }, + "-profile test --tools manta somatic": { + "content": [ + 22, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MANTA_GERMLINE": { + "manta": "1.6.0" + }, + "MANTA_SOMATIC": { + "manta": "1.6.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/manta", + "reports/bcftools/manta/sample3", + "reports/bcftools/manta/sample3/sample3.manta.diploid_sv.bcftools_stats.txt", + "reports/bcftools/manta/sample4_vs_sample3", + "reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt", + "reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/manta", + "reports/vcftools/manta/sample3", + "reports/vcftools/manta/sample3/sample3.manta.diploid_sv.FILTER.summary", + "reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.count", + "reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.qual", + "reports/vcftools/manta/sample4_vs_sample3", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.FILTER.summary", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.count", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.qual", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.FILTER.summary", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.count", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.qual", + "variant_calling", + "variant_calling/manta", + "variant_calling/manta/sample3", + "variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz", + "variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz.tbi", + "variant_calling/manta/sample4_vs_sample3", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz.tbi", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,336d786b273c4d4e714d51c44207ff56", + "mosdepth-cumcoverage-dist-id.txt:md5,0cc87c596a0fc30ebb676c4587e986de", + "mosdepth_perchrom.txt:md5,336d786b273c4d4e714d51c44207ff56", + "multiqc_citations.txt:md5,e49571d90d78a0d1bc919b82d7dc48c6", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample3.manta.diploid_sv.bcftools_stats.txt:md5,36a838390faba81e3eabf5ac8a093a4a", + "sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt:md5,f00cf810d34ef7e5c7980f7039bb4446", + "sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt:md5,7af2ea2e84154ddf2a483b1bd1f0646c", + "sample3.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample3.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample3.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample3.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample3.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample4.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample4.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample4.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample4.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample4.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample3.manta.diploid_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample3.manta.diploid_sv.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "sample4_vs_sample3.manta.diploid_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample4_vs_sample3.manta.diploid_sv.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "sample4_vs_sample3.manta.somatic_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample4_vs_sample3.manta.somatic_sv.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f" + ], + "No BAM files", + "No CRAM files", + [ + "sample3.manta.diploid_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample4_vs_sample3.manta.diploid_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample4_vs_sample3.manta.somatic_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:05:13.763910639" + }, + "-profile test --tools manta --no_intervals germline": { + "content": [ + 9, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "MANTA_GERMLINE": { + "manta": "1.6.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/manta", + "reports/bcftools/manta/sample1", + "reports/bcftools/manta/sample1/sample1.manta.diploid_sv.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample1", + "reports/mosdepth/sample1/sample1.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.summary.txt", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample1", + "reports/samtools/sample1/sample1.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/manta", + "reports/vcftools/manta/sample1", + "reports/vcftools/manta/sample1/sample1.manta.diploid_sv.FILTER.summary", + "reports/vcftools/manta/sample1/sample1.manta.diploid_sv.TsTv.count", + "reports/vcftools/manta/sample1/sample1.manta.diploid_sv.TsTv.qual", + "variant_calling", + "variant_calling/manta", + "variant_calling/manta/sample1", + "variant_calling/manta/sample1/sample1.manta.diploid_sv.vcf.gz", + "variant_calling/manta/sample1/sample1.manta.diploid_sv.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,49fd828d21d79a5c9430cdb30d7f0126", + "mosdepth-cumcoverage-dist-id.txt:md5,edd8dfc8023e629e476e01c3a1448d30", + "mosdepth_perchrom.txt:md5,49fd828d21d79a5c9430cdb30d7f0126", + "multiqc_citations.txt:md5,e49571d90d78a0d1bc919b82d7dc48c6", + "samtools-stats-dp.txt:md5,41d36c1a8413d565788a1a364b467606", + "samtools_alignment_plot.txt:md5,35f8fd2a557568b2237193f46afbab5c", + "sample1.manta.diploid_sv.bcftools_stats.txt:md5,636109db283cbee4539786928c811893", + "sample1.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample1.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample1.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample1.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample1.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample1.manta.diploid_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample1.manta.diploid_sv.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a" + ], + "No BAM files", + "No CRAM files", + [ + "sample1.manta.diploid_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:04:06.55406911" + } +} diff --git a/tests/variant_calling_mpileup.nf.test b/tests/variant_calling_mpileup.nf.test new file mode 100644 index 0000000000..ce9de114c6 --- /dev/null +++ b/tests/variant_calling_mpileup.nf.test @@ -0,0 +1,59 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools mpileup --input recalibrated_germline.csv", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_germline.csv", + step: "variant_calling", + tools: 'mpileup' + ] + ], + [ + name: "-profile test --tools mpileup --input recalibrated_germline.csv --no_intervals", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/recalibrated_germline.csv", + step: "variant_calling", + tools: 'mpileup', + no_intervals: true + ] + ], + [ + name: "-profile test --tools mpileup --input recalibrated_tumoronly.csv", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + step: "variant_calling", + tools: 'mpileup' + ] + ], + [ + name: "-profile test --tools mpileup --input recalibrated_tumoronly.csv --no_intervals", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + step: "variant_calling", + tools: 'mpileup', + no_intervals: true + ] + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_mpileup.nf.test.snap b/tests/variant_calling_mpileup.nf.test.snap new file mode 100644 index 0000000000..aa59a6c33b --- /dev/null +++ b/tests/variant_calling_mpileup.nf.test.snap @@ -0,0 +1,702 @@ +{ + "-profile test --tools mpileup --input recalibrated_tumoronly.csv": { + "content": [ + 11, + { + "BCFTOOLS_MPILEUP": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/bcftools", + "reports/bcftools/bcftools/sample2", + "reports/bcftools/bcftools/sample2/sample2.bcftools.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/bcftools", + "reports/vcftools/bcftools/sample2", + "reports/vcftools/bcftools/sample2/sample2.bcftools.FILTER.summary", + "reports/vcftools/bcftools/sample2/sample2.bcftools.TsTv.count", + "reports/vcftools/bcftools/sample2/sample2.bcftools.TsTv.qual", + "variant_calling", + "variant_calling/bcftools", + "variant_calling/bcftools/sample2", + "variant_calling/bcftools/sample2/sample2.bcftools.vcf.gz", + "variant_calling/bcftools/sample2/sample2.bcftools.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,ff38f1e35ce12244c751921db673b23c", + "mosdepth-cumcoverage-dist-id.txt:md5,8398d1127a10d0c002831ddedfb9713b", + "mosdepth_perchrom.txt:md5,ff38f1e35ce12244c751921db673b23c", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.bcftools.bcftools_stats.txt:md5,3299f97352e32c873c95e43922c79147", + "sample2.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample2.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample2.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample2.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample2.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample2.bcftools.FILTER.summary:md5,8766995f3e4119ef30dfdaa9fb3752ce", + "sample2.bcftools.TsTv.count:md5,01df95fcb4df593f7e1b214d90ebdb59" + ], + "No BAM files", + "No CRAM files", + [ + "sample2.bcftools.vcf.gz:md5,439c858d3b61aa738252353e2fd04225" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:12:38.089637491" + }, + "-profile test --tools mpileup --input recalibrated_germline.csv --no_intervals": { + "content": [ + 9, + { + "BCFTOOLS_MPILEUP": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/bcftools", + "reports/bcftools/bcftools/sample1", + "reports/bcftools/bcftools/sample1/sample1.bcftools.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample1", + "reports/mosdepth/sample1/sample1.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.summary.txt", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample1", + "reports/samtools/sample1/sample1.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/bcftools", + "reports/vcftools/bcftools/sample1", + "reports/vcftools/bcftools/sample1/sample1.bcftools.FILTER.summary", + "reports/vcftools/bcftools/sample1/sample1.bcftools.TsTv.count", + "reports/vcftools/bcftools/sample1/sample1.bcftools.TsTv.qual", + "variant_calling", + "variant_calling/bcftools", + "variant_calling/bcftools/sample1", + "variant_calling/bcftools/sample1/sample1.bcftools.vcf.gz", + "variant_calling/bcftools/sample1/sample1.bcftools.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,49fd828d21d79a5c9430cdb30d7f0126", + "mosdepth-cumcoverage-dist-id.txt:md5,edd8dfc8023e629e476e01c3a1448d30", + "mosdepth_perchrom.txt:md5,49fd828d21d79a5c9430cdb30d7f0126", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,41d36c1a8413d565788a1a364b467606", + "samtools_alignment_plot.txt:md5,35f8fd2a557568b2237193f46afbab5c", + "sample1.bcftools.bcftools_stats.txt:md5,0659e2f55ea631b8757dd04facc286a1", + "sample1.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample1.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample1.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample1.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample1.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample1.bcftools.FILTER.summary:md5,83a10512eb9d035f409a84db7e620c28", + "sample1.bcftools.TsTv.count:md5,bfa998e75cbcb3da66f823cf39ef1e48" + ], + "No BAM files", + "No CRAM files", + [ + "sample1.bcftools.vcf.gz:md5,83ab8ae7c467336e1ee7f672bbb037ad" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:11:16.876484642" + }, + "-profile test --tools mpileup --input recalibrated_tumoronly.csv --no_intervals": { + "content": [ + 9, + { + "BCFTOOLS_MPILEUP": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/bcftools", + "reports/bcftools/bcftools/sample2", + "reports/bcftools/bcftools/sample2/sample2.bcftools.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/bcftools", + "reports/vcftools/bcftools/sample2", + "reports/vcftools/bcftools/sample2/sample2.bcftools.FILTER.summary", + "reports/vcftools/bcftools/sample2/sample2.bcftools.TsTv.count", + "reports/vcftools/bcftools/sample2/sample2.bcftools.TsTv.qual", + "variant_calling", + "variant_calling/bcftools", + "variant_calling/bcftools/sample2", + "variant_calling/bcftools/sample2/sample2.bcftools.vcf.gz", + "variant_calling/bcftools/sample2/sample2.bcftools.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,ff38f1e35ce12244c751921db673b23c", + "mosdepth-cumcoverage-dist-id.txt:md5,8398d1127a10d0c002831ddedfb9713b", + "mosdepth_perchrom.txt:md5,ff38f1e35ce12244c751921db673b23c", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.bcftools.bcftools_stats.txt:md5,cc6063baaf7443b12b3fa1c972e804c8", + "sample2.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample2.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample2.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample2.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample2.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample2.bcftools.FILTER.summary:md5,f295c70f174e7705fc2bac607aedbfda", + "sample2.bcftools.TsTv.count:md5,e5d20f81fc97f7ee97fb6cb6bd851047" + ], + "No BAM files", + "No CRAM files", + [ + "sample2.bcftools.vcf.gz:md5,bb006620fd9b7eaf5104d64f34290901" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:14:01.579156158" + }, + "-profile test --tools mpileup --input recalibrated_germline.csv": { + "content": [ + 11, + { + "BCFTOOLS_MPILEUP": { + "bcftools": 1.21 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/bcftools", + "reports/bcftools/bcftools/sample1", + "reports/bcftools/bcftools/sample1/sample1.bcftools.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample1", + "reports/mosdepth/sample1/sample1.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.summary.txt", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample1", + "reports/samtools/sample1/sample1.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/bcftools", + "reports/vcftools/bcftools/sample1", + "reports/vcftools/bcftools/sample1/sample1.bcftools.FILTER.summary", + "reports/vcftools/bcftools/sample1/sample1.bcftools.TsTv.count", + "reports/vcftools/bcftools/sample1/sample1.bcftools.TsTv.qual", + "variant_calling", + "variant_calling/bcftools", + "variant_calling/bcftools/sample1", + "variant_calling/bcftools/sample1/sample1.bcftools.vcf.gz", + "variant_calling/bcftools/sample1/sample1.bcftools.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,49fd828d21d79a5c9430cdb30d7f0126", + "mosdepth-cumcoverage-dist-id.txt:md5,edd8dfc8023e629e476e01c3a1448d30", + "mosdepth_perchrom.txt:md5,49fd828d21d79a5c9430cdb30d7f0126", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,41d36c1a8413d565788a1a364b467606", + "samtools_alignment_plot.txt:md5,35f8fd2a557568b2237193f46afbab5c", + "sample1.bcftools.bcftools_stats.txt:md5,a4865cc7e9dfbea42d098f4bbbc7459d", + "sample1.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample1.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample1.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample1.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample1.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample1.bcftools.FILTER.summary:md5,9b62595b026decf12e9198d531e4307a", + "sample1.bcftools.TsTv.count:md5,6c937125d7bac4c491bea50f18cba43a" + ], + "No BAM files", + "No CRAM files", + [ + "sample1.bcftools.vcf.gz:md5,c55cf36bc05b6a7b4a98bb3ba925fc0a" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:09:53.430122786" + } +} diff --git a/tests/variant_calling_msisensor2.nf.test b/tests/variant_calling_msisensor2.nf.test new file mode 100644 index 0000000000..767eb0bcd4 --- /dev/null +++ b/tests/variant_calling_msisensor2.nf.test @@ -0,0 +1,72 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + + def test_scenario = [ + [ + name: "-profile test --tools msisensor2 tumor_only", + params: [ + genome: null, + igenomes_ignore: true, + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/bam_tumoronly_msisensor2.csv", + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + msisensor2_models: modules_testdata_base_path + 'genomics/homo_sapiens/msisensor2/models_hg19_17sites.tar.gz', + step: "variant_calling", + tools: 'msisensor2', + wes: true + ] + ], + [ + name: "-profile test --tools msisensor2 --build_only_index --input false", + params: [ + genome: null, + igenomes_ignore: true, + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: false, + build_only_index: true, + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + msisensor2_models: modules_testdata_base_path + 'genomics/homo_sapiens/msisensor2/models_hg19_17sites.tar.gz', + no_intervals: true, + step: "variant_calling", + tools: 'msisensor2', + wes: true + ] + ], + [ + name: "-profile test --tools msisensor2 --build_only_index --input false -stub", + params: [ + genome: null, + igenomes_ignore: true, + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: false, + build_only_index: true, + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + msisensor2_models: modules_testdata_base_path + 'genomics/homo_sapiens/msisensor2/models_hg19_17sites.tar.gz', + no_intervals: true, + step: "variant_calling", + tools: 'msisensor2', + wes: true + ], + stub: true + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_msisensor2.nf.test.snap b/tests/variant_calling_msisensor2.nf.test.snap new file mode 100644 index 0000000000..1bb4b030e4 --- /dev/null +++ b/tests/variant_calling_msisensor2.nf.test.snap @@ -0,0 +1,279 @@ +{ + "-profile test --tools msisensor2 --build_only_index --input false": { + "content": [ + 4, + { + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "UNTAR_MSISENSOR2_MODELS": { + "untar": 1.34 + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reference/dict/genome.dict", + "reference/intervals", + "reference/intervals/no_intervals.bed.gz", + "reference/msisensor2", + "reference/msisensor2/models", + "reference/msisensor2/models/016a16e12aca2bdba3713a3be76f72cd", + "reference/msisensor2/models/02d42c2bda19aac304d6e86390c7f328", + "reference/msisensor2/models/1030c0aa35ca5c263daeae866ad18632", + "reference/msisensor2/models/15c3f5ec1c020d8f44283e40a2d9b6bb", + "reference/msisensor2/models/15d6012f9a234b7adbbeecec524aea7d", + "reference/msisensor2/models/2cf9a58f57e78b88acd86d792fe6a7b3", + "reference/msisensor2/models/3ba04410c7ccbfc33e8b1b11d8132ae9", + "reference/msisensor2/models/4431c9dc08be932c460a9e67192e7c57", + "reference/msisensor2/models/4f5fa7bed97b48093375222d242fc982", + "reference/msisensor2/models/657a38415ac5a5d36609d9c180170b7a", + "reference/msisensor2/models/71e6c0d59ea09d2a7acc566560841e34", + "reference/msisensor2/models/8144b15900bba7086e86b31a0e1f8cfd", + "reference/msisensor2/models/9bf6f7a544f369c3262a3a6f72cfdd7b", + "reference/msisensor2/models/b8a36f2274b33cb0ed932e85cd1ddd5a", + "reference/msisensor2/models/c08f164ded323a8c2606c408c555d73d", + "reference/msisensor2/models/ceaa36ddbb76dc6eb6199ed946945788", + "reference/msisensor2/models/e05d5da7208a924762311eddc4ec96c0", + "reference/msisensor2/models/f8a20acf51ccb2b0ce6af42f24a8b5ef" + ], + [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "genome.dict:md5,de4ff13287d28c1679a11ae989a3a980", + "no_intervals.bed.gz:md5,f3dac01ea66b95fe477446fde2d31489", + "016a16e12aca2bdba3713a3be76f72cd:md5,26ddcb7b64f43835f254a13b29adf3fc", + "02d42c2bda19aac304d6e86390c7f328:md5,15ab43fe84b8b7169979d62e267e2b5f", + "1030c0aa35ca5c263daeae866ad18632:md5,41e6de8e7e35f66b691072d9839e128a", + "15c3f5ec1c020d8f44283e40a2d9b6bb:md5,7fe77d6cd3e0714d5b4390e37df7e7b9", + "15d6012f9a234b7adbbeecec524aea7d:md5,9b11cce514fc209f54501e042a7aad8e", + "2cf9a58f57e78b88acd86d792fe6a7b3:md5,9d894caf8575df7a218d37f249e5d09f", + "3ba04410c7ccbfc33e8b1b11d8132ae9:md5,71083a4e40f7b9e3e1ceaad6f58755f7", + "4431c9dc08be932c460a9e67192e7c57:md5,f32023840c68cb430c79f4df7db8d36e", + "4f5fa7bed97b48093375222d242fc982:md5,412fbe16dfdd680e5edb2d06c2aee0b6", + "657a38415ac5a5d36609d9c180170b7a:md5,3aa2c66007f669e888e061649e92f0a0", + "71e6c0d59ea09d2a7acc566560841e34:md5,994c5dfd8179ba3a230f84d6938eda8e", + "8144b15900bba7086e86b31a0e1f8cfd:md5,cee5db3ea768ca6311048b9e2aa30ba9", + "9bf6f7a544f369c3262a3a6f72cfdd7b:md5,0dee4e2124f75a2848f0864aa59647a2", + "b8a36f2274b33cb0ed932e85cd1ddd5a:md5,92f84a4b951c4763bde72bf4f4ea875c", + "c08f164ded323a8c2606c408c555d73d:md5,a9f42a43cb03a9ff37c238f13a75b614", + "ceaa36ddbb76dc6eb6199ed946945788:md5,b4c563c94f59d7cb2c5c26d04d441a01", + "e05d5da7208a924762311eddc4ec96c0:md5,0dd92cb27616630b4882bd71e4a85d9e", + "f8a20acf51ccb2b0ce6af42f24a8b5ef:md5,2b3ede1d6fc4bc83ab2052f3eaa2618c" + ], + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-10-13T12:34:18.918526" + }, + "-profile test --tools msisensor2 --build_only_index --input false -stub": { + "content": [ + 4, + { + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "UNTAR_MSISENSOR2_MODELS": { + "untar": 1.34 + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/.stub", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reference/dict/genome.dict", + "reference/intervals", + "reference/intervals/no_intervals.bed.gz", + "reference/msisensor2", + "reference/msisensor2/models", + "reference/msisensor2/models/016a16e12aca2bdba3713a3be76f72cd", + "reference/msisensor2/models/02d42c2bda19aac304d6e86390c7f328", + "reference/msisensor2/models/1030c0aa35ca5c263daeae866ad18632", + "reference/msisensor2/models/15c3f5ec1c020d8f44283e40a2d9b6bb", + "reference/msisensor2/models/15d6012f9a234b7adbbeecec524aea7d", + "reference/msisensor2/models/2cf9a58f57e78b88acd86d792fe6a7b3", + "reference/msisensor2/models/3ba04410c7ccbfc33e8b1b11d8132ae9", + "reference/msisensor2/models/4431c9dc08be932c460a9e67192e7c57", + "reference/msisensor2/models/4f5fa7bed97b48093375222d242fc982", + "reference/msisensor2/models/657a38415ac5a5d36609d9c180170b7a", + "reference/msisensor2/models/71e6c0d59ea09d2a7acc566560841e34", + "reference/msisensor2/models/8144b15900bba7086e86b31a0e1f8cfd", + "reference/msisensor2/models/9bf6f7a544f369c3262a3a6f72cfdd7b", + "reference/msisensor2/models/b8a36f2274b33cb0ed932e85cd1ddd5a", + "reference/msisensor2/models/c08f164ded323a8c2606c408c555d73d", + "reference/msisensor2/models/ceaa36ddbb76dc6eb6199ed946945788", + "reference/msisensor2/models/e05d5da7208a924762311eddc4ec96c0", + "reference/msisensor2/models/f8a20acf51ccb2b0ce6af42f24a8b5ef" + ], + [ + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T09:59:14.877299469" + }, + "-profile test --tools msisensor2 tumor_only": { + "content": [ + 9, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MSISENSOR2_MSI": { + "msisensor2": 0.1 + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "UNTAR_MSISENSOR2_MODELS": { + "untar": 1.34 + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reference/msisensor2", + "reports", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.per-base.bed.gz", + "reports/mosdepth/test/test.recal.per-base.bed.gz.csi", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "variant_calling", + "variant_calling/msisensor2", + "variant_calling/msisensor2/test", + "variant_calling/msisensor2/test/test", + "variant_calling/msisensor2/test/test_dis", + "variant_calling/msisensor2/test/test_somatic" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,6a6826792618fb590f812e8a6665c86f", + "mosdepth_perchrom.txt:md5,6a6826792618fb590f812e8a6665c86f", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "samtools-stats-dp.txt:md5,981afa05e418de2b371c8d568eccf6de", + "samtools_alignment_plot.txt:md5,f16cda6a9165b8427268555f0e3a3f7d", + "test.recal.mosdepth.global.dist.txt:md5,96d22fe7b6f5824cde06c98c96dec304", + "test.recal.mosdepth.region.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.recal.mosdepth.summary.txt:md5,81c058bd237404d95b3f838bae176303", + "test.recal.per-base.bed.gz:md5,bafb7489cc40a55cfe85f99ea6d36cc7", + "test.recal.per-base.bed.gz.csi:md5,e72e7c7886d42728d785bb97dffd2a07", + "test.recal.regions.bed.gz:md5,d7e8ac59cdfd4cfb70712e699c6848f5", + "test.recal.regions.bed.gz.csi:md5,d9cfb4c9a4ffcf4b0122c378a547162a", + "test:md5,a3290f7539dbbf83777e8590156c0e28", + "test_dis:md5,85205504ea8652ebdee46ac07ee1a8f6" + ], + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T09:57:39.631656166" + } +} diff --git a/tests/variant_calling_msisensorpro.nf.test b/tests/variant_calling_msisensorpro.nf.test new file mode 100644 index 0000000000..7024b1ddc3 --- /dev/null +++ b/tests/variant_calling_msisensorpro.nf.test @@ -0,0 +1,68 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools msisensorpro somatic", + params: [ + genome: null, + igenomes_ignore: true, + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + step: "variant_calling", + tools: 'msisensorpro', + wes: true + ] + ], + [ + name: "-profile test --tools msisensorpro somatic --build_only_index --input false", + params: [ + genome: null, + igenomes_ignore: true, + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: false, + build_only_index: true, + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + no_intervals: true, + step: "variant_calling", + tools: 'msisensorpro', + wes: true + ] + ], + [ + name: "-profile test --tools msisensorpro somatic --build_only_index --input false -stub", + params: [ + genome: null, + igenomes_ignore: true, + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: false, + build_only_index: true, + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + no_intervals: true, + step: "variant_calling", + tools: 'msisensorpro', + wes: true + ], + stub: true + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_msisensorpro.nf.test.snap b/tests/variant_calling_msisensorpro.nf.test.snap new file mode 100644 index 0000000000..6325252c84 --- /dev/null +++ b/tests/variant_calling_msisensorpro.nf.test.snap @@ -0,0 +1,247 @@ +{ + "-profile test --tools msisensorpro somatic --build_only_index --input false -stub": { + "content": [ + 4, + { + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "MSISENSORPRO_SCAN": { + "msisensor-pro": "1.3.0" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/.stub", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reference/dict/genome.dict", + "reference/intervals", + "reference/intervals/no_intervals.bed.gz", + "reference/msisensorpro", + "reference/msisensorpro/genome.msisensor_scan.list" + ], + [ + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-09T10:01:55.42261324" + }, + "-profile test --tools msisensorpro somatic": { + "content": [ + 11, + { + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MSISENSORPRO_MSISOMATIC": { + "msisensor-pro": "1.3.0" + }, + "MSISENSORPRO_SCAN": { + "msisensor-pro": "1.3.0" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reference/msisensorpro", + "reports", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "variant_calling", + "variant_calling/msisensorpro", + "variant_calling/msisensorpro/sample4_vs_sample3", + "variant_calling/msisensorpro/sample4_vs_sample3/sample4_vs_sample3", + "variant_calling/msisensorpro/sample4_vs_sample3/sample4_vs_sample3_dis" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,4ee05f71086179b42a01cd2fb450346f", + "mosdepth-cumcoverage-dist-id.txt:md5,cb7468f51b8be1230fb3ac5b130be31f", + "mosdepth_perchrom.txt:md5,4ee05f71086179b42a01cd2fb450346f", + "multiqc_citations.txt:md5,6239b0f0a9736ccaf3baff3014dd585b", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample3.recal.mosdepth.global.dist.txt:md5,69e29702ef01fd8f6c7a5468fc35a16a", + "sample3.recal.mosdepth.region.dist.txt:md5,6ec49cd7d510c2eb3d9d90fdb79b783a", + "sample3.recal.mosdepth.summary.txt:md5,103098d0bf76ed82d2b87d5f242b099a", + "sample3.recal.per-base.bed.gz:md5,297f96648928d0ca5184223fb9941e7c", + "sample3.recal.per-base.bed.gz.csi:md5,c67dcd711b096eb42f43784d5eadbc0d", + "sample3.recal.regions.bed.gz:md5,314ce8d7273eff353072108aa77c327c", + "sample3.recal.regions.bed.gz.csi:md5,9cb0ad7039a3b703d16ca7d5b835c0ee", + "sample4.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample4.recal.mosdepth.region.dist.txt:md5,39005ffaac22871ffaaf19656fe69c5b", + "sample4.recal.mosdepth.summary.txt:md5,68d4b98f17361fddf73052ead34fa370", + "sample4.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample4.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample4.recal.regions.bed.gz:md5,b7561bc56a955f7db0f11e67e2ec0386", + "sample4.recal.regions.bed.gz.csi:md5,393c2749068304d8545b501b9d4658e4", + "sample4_vs_sample3:md5,efc7a09642d444d7475d976c7c8110f4", + "sample4_vs_sample3_dis:md5,780b282473e51808c5ae32b50b4a6406" + ], + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:18:49.269683325" + }, + "-profile test --tools msisensorpro somatic --build_only_index --input false": { + "content": [ + 4, + { + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "MSISENSORPRO_SCAN": { + "msisensor-pro": "1.3.0" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + } + }, + [ + "csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reference/dict/genome.dict", + "reference/intervals", + "reference/intervals/no_intervals.bed.gz", + "reference/msisensorpro", + "reference/msisensorpro/genome.msisensor_scan.list" + ], + [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "genome.dict:md5,de4ff13287d28c1679a11ae989a3a980", + "no_intervals.bed.gz:md5,f3dac01ea66b95fe477446fde2d31489", + "genome.msisensor_scan.list:md5,614754c7f1f44d5988dd80f8f21f69d1" + ], + "No BAM files", + "No CRAM files", + "No VCF files", + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:19:39.916442214" + } +} diff --git a/tests/variant_calling_muse.nf.test b/tests/variant_calling_muse.nf.test new file mode 100644 index 0000000000..209b918b45 --- /dev/null +++ b/tests/variant_calling_muse.nf.test @@ -0,0 +1,63 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools muse --input recalibrated_somatic.csv", + params: [ + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + genome: null, + igenomes_ignore: true, + chr_dir: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz', + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + germline_resource: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz', + germline_resource_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + pon: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz', + pon_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz.tbi', + ngscheckmate_bed: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/SNP_GRCh38_hg38_wChr.bed', + nucleotides_per_second: 20, + step: 'variant_calling', + tools: 'muse', + wes: true + ], + include_muse_txt: true + ], + [ + name: "-profile test --tools muse --input recalibrated_somatic.csv -stub", + params: [ + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + genome: null, + igenomes_ignore: true, + chr_dir: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz', + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + germline_resource: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz', + germline_resource_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + pon: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz', + pon_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz.tbi', + ngscheckmate_bed: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/SNP_GRCh38_hg38_wChr.bed', + nucleotides_per_second: 20, + step: 'variant_calling', + tools: 'muse', + wes: true + ], + stub: true + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_muse.nf.test.snap b/tests/variant_calling_muse.nf.test.snap new file mode 100644 index 0000000000..e8c5d5170d --- /dev/null +++ b/tests/variant_calling_muse.nf.test.snap @@ -0,0 +1,371 @@ +{ + "-profile test --tools muse --input recalibrated_somatic.csv -stub": { + "content": [ + 17, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CRAM_TO_BAM": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MUSE_CALL": { + "muse": "2.1.2" + }, + "MUSE_SUMP": { + "bgzip": "1.22.1", + "muse": "2.1.2" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/.stub", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/converted", + "preprocessing/converted/cram_to_bam", + "preprocessing/converted/cram_to_bam/sample3", + "preprocessing/converted/cram_to_bam/sample3/sample3.bam", + "preprocessing/converted/cram_to_bam/sample3/sample3.bam.bai", + "preprocessing/converted/cram_to_bam/sample4", + "preprocessing/converted/cram_to_bam/sample4/sample4.bam", + "preprocessing/converted/cram_to_bam/sample4/sample4.bam.bai", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/muse", + "reports/bcftools/muse/sample4_vs_sample3", + "reports/bcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.per-base.d4", + "reports/mosdepth/sample3/sample3.recal.quantized.bed.gz", + "reports/mosdepth/sample3/sample3.recal.quantized.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.summary.txt", + "reports/mosdepth/sample3/sample3.recal.thresholds.bed.gz", + "reports/mosdepth/sample3/sample3.recal.thresholds.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample4/sample4.recal.per-base.d4", + "reports/mosdepth/sample4/sample4.recal.quantized.bed.gz", + "reports/mosdepth/sample4/sample4.recal.quantized.bed.gz.csi", + "reports/mosdepth/sample4/sample4.recal.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4/sample4.recal.summary.txt", + "reports/mosdepth/sample4/sample4.recal.thresholds.bed.gz", + "reports/mosdepth/sample4/sample4.recal.thresholds.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/muse", + "reports/vcftools/muse/sample4_vs_sample3", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.012", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.012.indv", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.012.pos", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.BEAGLE.GL", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.BEAGLE.PL", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.FILTER.summary", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.FORMAT", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.INFO", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.LROH", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.Tajima.D", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.TsTv", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.TsTv.count", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.TsTv.qual", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.TsTv.summary", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.bcf", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.diff.discordance.matrix", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.diff.indv", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.diff.indv_in_files", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.diff.sites", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.diff.sites_in_files", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.diff.switch", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.frq", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.frq.count", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.gdepth", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.geno.chisq", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.geno.ld", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.hap.ld", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.hapcount", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.het", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.hwe", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.idepth", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.ifreqburden", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.imiss", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.impute.hap", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.impute.hap.indv", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.impute.hap.legend", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.indel.hist", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.interchrom.geno.ld", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.interchrom.hap.ld", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.kept.sites", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.ldepth", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.ldepth.mean", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.ldhat.locs", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.ldhat.sites", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.list.geno.ld", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.list.hap.ld", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.lmiss", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.lqual", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.map", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.mendel", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.ped", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.relatedness", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.relatedness2", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.removed.sites", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.singletons", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.sites.pi", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.snpden", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.tfam", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.tped", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.vcf", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.weir.fst", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.windowed.pi", + "variant_calling", + "variant_calling/muse", + "variant_calling/muse/sample4_vs_sample3", + "variant_calling/muse/sample4_vs_sample3/sample4_vs_sample3.MuSE.txt", + "variant_calling/muse/sample4_vs_sample3/sample4_vs_sample3.muse.vcf.gz", + "variant_calling/muse/sample4_vs_sample3/sample4_vs_sample3.muse.vcf.gz.tbi" + ], + [ + "WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected." + ] + ], + "timestamp": "2026-04-08T12:23:40.799740292", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.2" + } + }, + "-profile test --tools muse --input recalibrated_somatic.csv": { + "content": [ + 18, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CRAM_TO_BAM": { + "samtools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MUSE_CALL": { + "muse": "2.1.2" + }, + "MUSE_SUMP": { + "bgzip": "1.22.1", + "muse": "2.1.2" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/converted", + "preprocessing/converted/cram_to_bam", + "preprocessing/converted/cram_to_bam/sample3", + "preprocessing/converted/cram_to_bam/sample3/sample3.bam", + "preprocessing/converted/cram_to_bam/sample3/sample3.bam.bai", + "preprocessing/converted/cram_to_bam/sample4", + "preprocessing/converted/cram_to_bam/sample4/sample4.bam", + "preprocessing/converted/cram_to_bam/sample4/sample4.bam.bai", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/muse", + "reports/bcftools/muse/sample4_vs_sample3", + "reports/bcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/muse", + "reports/vcftools/muse/sample4_vs_sample3", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.FILTER.summary", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.TsTv.count", + "reports/vcftools/muse/sample4_vs_sample3/sample4_vs_sample3.muse.TsTv.qual", + "variant_calling", + "variant_calling/muse", + "variant_calling/muse/sample4_vs_sample3", + "variant_calling/muse/sample4_vs_sample3/sample4_vs_sample3.MuSE.txt", + "variant_calling/muse/sample4_vs_sample3/sample4_vs_sample3.muse.vcf.gz", + "variant_calling/muse/sample4_vs_sample3/sample4_vs_sample3.muse.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,4ee05f71086179b42a01cd2fb450346f", + "mosdepth-cumcoverage-dist-id.txt:md5,cb7468f51b8be1230fb3ac5b130be31f", + "mosdepth_perchrom.txt:md5,4ee05f71086179b42a01cd2fb450346f", + "multiqc_citations.txt:md5,e49571d90d78a0d1bc919b82d7dc48c6", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample4_vs_sample3.muse.bcftools_stats.txt:md5,09a0d72425a3638cbe8f1cbd254e66f3", + "sample3.recal.mosdepth.global.dist.txt:md5,69e29702ef01fd8f6c7a5468fc35a16a", + "sample3.recal.mosdepth.region.dist.txt:md5,6ec49cd7d510c2eb3d9d90fdb79b783a", + "sample3.recal.mosdepth.summary.txt:md5,103098d0bf76ed82d2b87d5f242b099a", + "sample3.recal.per-base.bed.gz:md5,297f96648928d0ca5184223fb9941e7c", + "sample3.recal.per-base.bed.gz.csi:md5,c67dcd711b096eb42f43784d5eadbc0d", + "sample3.recal.regions.bed.gz:md5,314ce8d7273eff353072108aa77c327c", + "sample3.recal.regions.bed.gz.csi:md5,9cb0ad7039a3b703d16ca7d5b835c0ee", + "sample4.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample4.recal.mosdepth.region.dist.txt:md5,39005ffaac22871ffaaf19656fe69c5b", + "sample4.recal.mosdepth.summary.txt:md5,68d4b98f17361fddf73052ead34fa370", + "sample4.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample4.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample4.recal.regions.bed.gz:md5,b7561bc56a955f7db0f11e67e2ec0386", + "sample4.recal.regions.bed.gz.csi:md5,393c2749068304d8545b501b9d4658e4", + "sample4_vs_sample3.muse.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample4_vs_sample3.muse.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f" + ], + [ + "sample3.bam:md5,891da60dc1cc5c6455bfc1442aea0982", + "sample4.bam:md5,36856b3e6dd1bc0f10688b9d8c02faa8" + ], + "No CRAM files", + [ + "sample4_vs_sample3.MuSE.txt:md5,32961f7d718b4a1114f168253e50ef9e" + ], + [ + "sample4_vs_sample3.muse.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "timestamp": "2026-02-09T15:12:56.502933061", + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + } + } +} \ No newline at end of file diff --git a/tests/variant_calling_mutect2.nf.test b/tests/variant_calling_mutect2.nf.test new file mode 100644 index 0000000000..970a6716d6 --- /dev/null +++ b/tests/variant_calling_mutect2.nf.test @@ -0,0 +1,81 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + profile "test,mutect" + + def test_scenario = [ + [ + name: "-profile test --tools mutect2 somatic", + params: [ + genome: null, + igenomes_ignore: true, + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + step: "variant_calling", + tools: 'mutect2', + wes: true + ] + ], + [ + name: "-profile test --tools mutect2 somatic --no_intervals", + params: [ + genome: null, + igenomes_ignore: true, + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + no_intervals: true, + step: "variant_calling", + tools: 'mutect2', + wes: true + ] + ], + [ + name: "-profile test --tools mutect2 tumoronly", + params: [ + genome: null, + igenomes_ignore: true, + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + step: "variant_calling", + tools: 'mutect2', + wes: true + ] + ], + [ + name: "-profile test --tools mutect2 --no_intervals tumoronly", + params: [ + genome: null, + igenomes_ignore: true, + dbsnp: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz', + dbsnp_tbi: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/dbsnp_138.hg38.vcf.gz.tbi', + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + input: "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + no_intervals: true, + step: "variant_calling", + tools: 'mutect2', + wes: true + ] + ] + ] + + // Generate tests for each scenario + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_mutect2.nf.test.snap b/tests/variant_calling_mutect2.nf.test.snap new file mode 100644 index 0000000000..d8225f325f --- /dev/null +++ b/tests/variant_calling_mutect2.nf.test.snap @@ -0,0 +1,777 @@ +{ + "-profile test --tools mutect2 --no_intervals tumoronly": { + "content": [ + 11, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "LEARNREADORIENTATIONMODEL": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MUTECT2": { + "gatk4": "4.6.1.0" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/mutect2", + "reports/bcftools/mutect2/sample2", + "reports/bcftools/mutect2/sample2/sample2.mutect2.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/mutect2", + "reports/vcftools/mutect2/sample2", + "reports/vcftools/mutect2/sample2/sample2.mutect2.FILTER.summary", + "reports/vcftools/mutect2/sample2/sample2.mutect2.TsTv.count", + "reports/vcftools/mutect2/sample2/sample2.mutect2.TsTv.qual", + "variant_calling", + "variant_calling/mutect2", + "variant_calling/mutect2/sample2", + "variant_calling/mutect2/sample2/sample2.mutect2.artifactprior.tar.gz", + "variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz", + "variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz.stats", + "variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,a0266cd06dd6f7f5463f09e3cb8af71d", + "mosdepth-cumcoverage-dist-id.txt:md5,88b94dd2dcc423983da65125ece7651e", + "mosdepth_perchrom.txt:md5,a0266cd06dd6f7f5463f09e3cb8af71d", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.mutect2.bcftools_stats.txt:md5,c275ee76762a37053d43f4e290485af8", + "sample2.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample2.recal.mosdepth.summary.txt:md5,0a7300e56eda6fba7c7564f00aa000f0", + "sample2.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample2.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample2.mutect2.FILTER.summary:md5,08f06620a8dcc70115bdde9137a91008", + "sample2.mutect2.TsTv.count:md5,0d59dcbdb127be60909111958ff7b5f5", + "sample2.mutect2.vcf.gz.stats:md5,76f749c53212d72e98801f6030fbf8a6" + ], + "No BAM files", + "No CRAM files", + [ + "sample2.mutect2.vcf.gz:md5,4a803cf2687fc368a8c4eef5e2fc8c9c" + ], + [ + "WARN: If Mutect2 is specified without a germline resource, no filtering will be done.", + "WARN: No Panel-of-normal was specified for Mutect2." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T23:34:19.18363343" + }, + "-profile test --tools mutect2 somatic": { + "content": [ + 15, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "LEARNREADORIENTATIONMODEL": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MUTECT2_PAIRED": { + "gatk4": "4.6.1.0" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/mutect2", + "reports/bcftools/mutect2/sample4_vs_sample3", + "reports/bcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/mutect2", + "reports/vcftools/mutect2/sample4_vs_sample3", + "reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.FILTER.summary", + "reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.TsTv.count", + "reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.TsTv.qual", + "variant_calling", + "variant_calling/mutect2", + "variant_calling/mutect2/sample4_vs_sample3", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.artifactprior.tar.gz", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.vcf.gz", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.vcf.gz.stats", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,4ee05f71086179b42a01cd2fb450346f", + "mosdepth-cumcoverage-dist-id.txt:md5,cb7468f51b8be1230fb3ac5b130be31f", + "mosdepth_perchrom.txt:md5,4ee05f71086179b42a01cd2fb450346f", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample4_vs_sample3.mutect2.bcftools_stats.txt:md5,65bc65858d3dfa1d8913119850626823", + "sample3.recal.mosdepth.global.dist.txt:md5,69e29702ef01fd8f6c7a5468fc35a16a", + "sample3.recal.mosdepth.region.dist.txt:md5,6ec49cd7d510c2eb3d9d90fdb79b783a", + "sample3.recal.mosdepth.summary.txt:md5,103098d0bf76ed82d2b87d5f242b099a", + "sample3.recal.per-base.bed.gz:md5,297f96648928d0ca5184223fb9941e7c", + "sample3.recal.per-base.bed.gz.csi:md5,c67dcd711b096eb42f43784d5eadbc0d", + "sample3.recal.regions.bed.gz:md5,314ce8d7273eff353072108aa77c327c", + "sample3.recal.regions.bed.gz.csi:md5,9cb0ad7039a3b703d16ca7d5b835c0ee", + "sample4.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample4.recal.mosdepth.region.dist.txt:md5,39005ffaac22871ffaaf19656fe69c5b", + "sample4.recal.mosdepth.summary.txt:md5,68d4b98f17361fddf73052ead34fa370", + "sample4.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample4.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample4.recal.regions.bed.gz:md5,b7561bc56a955f7db0f11e67e2ec0386", + "sample4.recal.regions.bed.gz.csi:md5,393c2749068304d8545b501b9d4658e4", + "sample4_vs_sample3.mutect2.FILTER.summary:md5,cac64448be577632a614af62a23af34a", + "sample4_vs_sample3.mutect2.TsTv.count:md5,3739f24da2d2019cc4bc2821e30658eb", + "sample4_vs_sample3.mutect2.vcf.gz.stats:md5,bd657dd9abf6e2354224bb0d20ba181e" + ], + "No BAM files", + "No CRAM files", + [ + "sample4_vs_sample3.mutect2.vcf.gz:md5,f2c46d0dae1b1a59180c0b9e595993d2" + ], + [ + "WARN: If Mutect2 is specified without a germline resource, no filtering will be done.", + "WARN: No Panel-of-normal was specified for Mutect2." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T23:23:51.624986383" + }, + "-profile test --tools mutect2 somatic --no_intervals": { + "content": [ + 13, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "LEARNREADORIENTATIONMODEL": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MUTECT2_PAIRED": { + "gatk4": "4.6.1.0" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/mutect2", + "reports/bcftools/mutect2/sample4_vs_sample3", + "reports/bcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz", + "reports/mosdepth/sample3/sample3.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz", + "reports/mosdepth/sample4/sample4.recal.per-base.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/mutect2", + "reports/vcftools/mutect2/sample4_vs_sample3", + "reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.FILTER.summary", + "reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.TsTv.count", + "reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.TsTv.qual", + "variant_calling", + "variant_calling/mutect2", + "variant_calling/mutect2/sample4_vs_sample3", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.artifactprior.tar.gz", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.vcf.gz", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.vcf.gz.stats", + "variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,14473da27940bc99c7b9ed36d82f7429", + "mosdepth-cumcoverage-dist-id.txt:md5,35fe7c32ea54f1a69c647202873bb7d7", + "mosdepth_perchrom.txt:md5,14473da27940bc99c7b9ed36d82f7429", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample4_vs_sample3.mutect2.bcftools_stats.txt:md5,65bc65858d3dfa1d8913119850626823", + "sample3.recal.mosdepth.global.dist.txt:md5,69e29702ef01fd8f6c7a5468fc35a16a", + "sample3.recal.mosdepth.summary.txt:md5,d2775eb102acc5950f7f53883dcb503d", + "sample3.recal.per-base.bed.gz:md5,297f96648928d0ca5184223fb9941e7c", + "sample3.recal.per-base.bed.gz.csi:md5,c67dcd711b096eb42f43784d5eadbc0d", + "sample4.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample4.recal.mosdepth.summary.txt:md5,0a7300e56eda6fba7c7564f00aa000f0", + "sample4.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample4.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample4_vs_sample3.mutect2.FILTER.summary:md5,cac64448be577632a614af62a23af34a", + "sample4_vs_sample3.mutect2.TsTv.count:md5,3739f24da2d2019cc4bc2821e30658eb", + "sample4_vs_sample3.mutect2.vcf.gz.stats:md5,4300e84631ee258660f95e846511d021" + ], + "No BAM files", + "No CRAM files", + [ + "sample4_vs_sample3.mutect2.vcf.gz:md5,f2c46d0dae1b1a59180c0b9e595993d2" + ], + [ + "WARN: If Mutect2 is specified without a germline resource, no filtering will be done.", + "WARN: No Panel-of-normal was specified for Mutect2." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T23:27:38.999681674" + }, + "-profile test --tools mutect2 tumoronly": { + "content": [ + 14, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "BUILD_INTERVALS": { + "gawk": "5.3.0" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "LEARNREADORIENTATIONMODEL": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "MUTECT2": { + "gatk4": "4.6.1.0" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reference/dict", + "reports", + "reports/bcftools", + "reports/bcftools/mutect2", + "reports/bcftools/mutect2/sample2", + "reports/bcftools/mutect2/sample2/sample2.mutect2.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz", + "reports/mosdepth/sample2/sample2.recal.per-base.bed.gz.csi", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/mutect2", + "reports/vcftools/mutect2/sample2", + "reports/vcftools/mutect2/sample2/sample2.mutect2.FILTER.summary", + "reports/vcftools/mutect2/sample2/sample2.mutect2.TsTv.count", + "reports/vcftools/mutect2/sample2/sample2.mutect2.TsTv.qual", + "variant_calling", + "variant_calling/mutect2", + "variant_calling/mutect2/sample2", + "variant_calling/mutect2/sample2/sample2.mutect2.artifactprior.tar.gz", + "variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz", + "variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz.stats", + "variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,a0266cd06dd6f7f5463f09e3cb8af71d", + "mosdepth-cumcoverage-dist-id.txt:md5,88b94dd2dcc423983da65125ece7651e", + "mosdepth_perchrom.txt:md5,a0266cd06dd6f7f5463f09e3cb8af71d", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.mutect2.bcftools_stats.txt:md5,c275ee76762a37053d43f4e290485af8", + "sample2.recal.mosdepth.global.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample2.recal.mosdepth.region.dist.txt:md5,f2dcd00a64947c49e8e4b93c2f4fbf27", + "sample2.recal.mosdepth.summary.txt:md5,b0b47739dcafeeb1a9e6218b8abca1e0", + "sample2.recal.per-base.bed.gz:md5,39a1bc436aa8546c26faedbe94cb676c", + "sample2.recal.per-base.bed.gz.csi:md5,cfb07b0ba46e8468b4342edb243536f3", + "sample2.recal.regions.bed.gz:md5,fb0efeba20ea272b7b709cf65246689e", + "sample2.recal.regions.bed.gz.csi:md5,e8452848671e9e5c147ff4cceee944af", + "sample2.mutect2.FILTER.summary:md5,08f06620a8dcc70115bdde9137a91008", + "sample2.mutect2.TsTv.count:md5,0d59dcbdb127be60909111958ff7b5f5", + "sample2.mutect2.vcf.gz.stats:md5,76f749c53212d72e98801f6030fbf8a6" + ], + "No BAM files", + "No CRAM files", + [ + "sample2.mutect2.vcf.gz:md5,4a803cf2687fc368a8c4eef5e2fc8c9c" + ], + [ + "WARN: If Mutect2 is specified without a germline resource, no filtering will be done.", + "WARN: No Panel-of-normal was specified for Mutect2." + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-15T23:30:58.725141411" + } +} diff --git a/tests/variant_calling_sentieon_dnascope.nf.test b/tests/variant_calling_sentieon_dnascope.nf.test new file mode 100644 index 0000000000..1b3740e3da --- /dev/null +++ b/tests/variant_calling_sentieon_dnascope.nf.test @@ -0,0 +1,41 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools sentieon_dnascope", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_bam.csv", + tools: "sentieon_dnascope", + step: "variant_calling" + ] + ], + [ + name: "-profile test --tools sentieon_dnascope --skip_tools dnascope_filter", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_bam.csv", + tools: "sentieon_dnascope", + step: "variant_calling", + skip_tools: "dnascope_filter" + ] + ], + [ + name: "-profile test --tools sentieon_dnascope --joint_germline --sentieon_dnascope_emit_mode gvcf", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_bam.csv", + tools: "sentieon_dnascope", + step: "variant_calling", + joint_germline: true, + sentieon_dnascope_emit_mode: "gvcf" + ] + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_sentieon_dnascope.nf.test.snap b/tests/variant_calling_sentieon_dnascope.nf.test.snap new file mode 100644 index 0000000000..d46dce2e7a --- /dev/null +++ b/tests/variant_calling_sentieon_dnascope.nf.test.snap @@ -0,0 +1,557 @@ +{ + "-profile test --tools sentieon_dnascope --skip_tools dnascope_filter": { + "content": [ + 12, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SENTIEON_DNASCOPE": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/sentieon_dnascope", + "reports/bcftools/sentieon_dnascope/test", + "reports/bcftools/sentieon_dnascope/test/test.dnascope.unfiltered.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/sentieon_dnascope", + "reports/vcftools/sentieon_dnascope/test", + "reports/vcftools/sentieon_dnascope/test/test.dnascope.unfiltered.FILTER.summary", + "reports/vcftools/sentieon_dnascope/test/test.dnascope.unfiltered.TsTv.count", + "reports/vcftools/sentieon_dnascope/test/test.dnascope.unfiltered.TsTv.qual", + "variant_calling", + "variant_calling/sentieon_dnascope", + "variant_calling/sentieon_dnascope/test", + "variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz", + "variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,115b9a93bcf511419e6622919172f321", + "mosdepth-cumcoverage-dist-id.txt:md5,df2fc825dfc41e30f72f5d125a6447ee", + "mosdepth_perchrom.txt:md5,115b9a93bcf511419e6622919172f321", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,06abce459a3e3192252b6196ae3b189e", + "samtools_alignment_plot.txt:md5,3622ecb71829cc1c581157e403048fd1", + "test.dnascope.unfiltered.bcftools_stats.txt:md5,ccd33b2c34ef09efe1fd7a86474eaa53", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "test.dnascope.unfiltered.FILTER.summary:md5,87a84b5f8ac3d3cbeeef7d60afcdbfe7", + "test.dnascope.unfiltered.TsTv.count:md5,b77c120ee5cc0423267200c67d60c663" + ], + "No BAM files", + "No CRAM files", + [ + "test.dnascope.unfiltered.vcf.gz:md5,372f2f8f6588b98ceaaae8ecd21d4f6e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-06-21T12:35:10.846318732" + }, + "-profile test --tools sentieon_dnascope": { + "content": [ + 13, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SENTIEON_DNAMODELAPPLY": { + "sentieon": 202503.01 + }, + "SENTIEON_DNASCOPE": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/sentieon_dnascope", + "reports/bcftools/sentieon_dnascope/test", + "reports/bcftools/sentieon_dnascope/test/test.dnascope.filtered.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/sentieon_dnascope", + "reports/vcftools/sentieon_dnascope/test", + "reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.FILTER.summary", + "reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.count", + "reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.qual", + "variant_calling", + "variant_calling/sentieon_dnascope", + "variant_calling/sentieon_dnascope/test", + "variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz", + "variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz.tbi", + "variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz", + "variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,115b9a93bcf511419e6622919172f321", + "mosdepth-cumcoverage-dist-id.txt:md5,df2fc825dfc41e30f72f5d125a6447ee", + "mosdepth_perchrom.txt:md5,115b9a93bcf511419e6622919172f321", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,06abce459a3e3192252b6196ae3b189e", + "samtools_alignment_plot.txt:md5,3622ecb71829cc1c581157e403048fd1", + "test.dnascope.filtered.bcftools_stats.txt:md5,ad4472e0b4767e0f3c4052ff3e3c1cdb", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "test.dnascope.filtered.FILTER.summary:md5,e67b24d296810a075378e5864bcea0fa", + "test.dnascope.filtered.TsTv.count:md5,b77c120ee5cc0423267200c67d60c663" + ], + "No BAM files", + "No CRAM files", + [ + "test.dnascope.filtered.vcf.gz:md5,3415f9c0cbc1492008b70febe02f3e3e", + "test.dnascope.unfiltered.vcf.gz:md5,372f2f8f6588b98ceaaae8ecd21d4f6e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-06-21T11:44:42.944302645" + }, + "-profile test --tools sentieon_dnascope --joint_germline --sentieon_dnascope_emit_mode gvcf": { + "content": [ + 15, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SENTIEON_DNASCOPE": { + "sentieon": 202503.01 + }, + "SENTIEON_GVCFTYPER": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/sentieon_dnascope", + "reports/bcftools/sentieon_dnascope/joint_variant_calling", + "reports/bcftools/sentieon_dnascope/joint_variant_calling/joint_germline.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/sentieon_dnascope", + "reports/vcftools/sentieon_dnascope/joint_variant_calling", + "reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline.FILTER.summary", + "reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline.TsTv.count", + "reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline.TsTv.qual", + "variant_calling", + "variant_calling/sentieon_dnascope", + "variant_calling/sentieon_dnascope/joint_variant_calling", + "variant_calling/sentieon_dnascope/joint_variant_calling/joint_germline.vcf.gz", + "variant_calling/sentieon_dnascope/joint_variant_calling/joint_germline.vcf.gz.tbi", + "variant_calling/sentieon_dnascope/test", + "variant_calling/sentieon_dnascope/test/test.dnascope.g.vcf.gz", + "variant_calling/sentieon_dnascope/test/test.dnascope.g.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,115b9a93bcf511419e6622919172f321", + "mosdepth-cumcoverage-dist-id.txt:md5,df2fc825dfc41e30f72f5d125a6447ee", + "mosdepth_perchrom.txt:md5,115b9a93bcf511419e6622919172f321", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,06abce459a3e3192252b6196ae3b189e", + "samtools_alignment_plot.txt:md5,3622ecb71829cc1c581157e403048fd1", + "joint_germline.bcftools_stats.txt:md5,b66f04fbf0ca30785d07a4d0b4156bb7", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "joint_germline.FILTER.summary:md5,b489034e69de07875cbf0f5548b4f55e", + "joint_germline.TsTv.count:md5,803d74e40f7716202bae2a3a81c1ddfc" + ], + "No BAM files", + "No CRAM files", + [ + "joint_germline.vcf.gz:md5,ef572f2d4597e009fb10037b736b6b5b", + "test.dnascope.g.vcf.gz:md5,2eec090f63df75523e2c0ecbc369a755" + ], + [ + "WARN: If GATK's Haplotypecaller, Sentieon's Dnascope and/or Sentieon's Haplotyper is specified, but without `--dbsnp`, `--known_snps`, `--known_indels` or the associated resource labels (ie `known_snps_vqsr`), no variant recalibration will be done. For recalibration you must provide all of these resources." + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-06-21T16:52:44.930404505" + } +} diff --git a/tests/variant_calling_sentieon_haplotypecaller.nf.test b/tests/variant_calling_sentieon_haplotypecaller.nf.test new file mode 100644 index 0000000000..de94bf0997 --- /dev/null +++ b/tests/variant_calling_sentieon_haplotypecaller.nf.test @@ -0,0 +1,53 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools sentieon_haplotyper", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_bam.csv", + tools: "sentieon_haplotyper", + step: "variant_calling" + ], + no_conda: true + ], + [ + name: "-profile test --tools sentieon_haplotyper --no_intervals", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_bam.csv", + tools: "sentieon_haplotyper", + step: "variant_calling", + no_intervals: true + ], + no_conda: true + ], + [ + name: "-profile test --tools sentieon_haplotyper --skip_tools haplotyper_filter", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_bam.csv", + tools: "sentieon_haplotyper", + step: "variant_calling", + skip_tools: "haplotyper_filter" + ] + ], + [ + name: "-profile test --tools sentieon_haplotyper --joint_germline --sentieon_haplotyper_emit_mode gvcf", + params: [ + input: "${projectDir}/tests/csv/3.0/mapped_single_bam.csv", + tools: "sentieon_haplotyper", + step: "variant_calling", + skip_tools: "haplotyper_filter", + joint_germline: true, + sentieon_haplotyper_emit_mode: "gvcf" + ] + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_sentieon_haplotypecaller.nf.test.snap b/tests/variant_calling_sentieon_haplotypecaller.nf.test.snap new file mode 100644 index 0000000000..341c182e6d --- /dev/null +++ b/tests/variant_calling_sentieon_haplotypecaller.nf.test.snap @@ -0,0 +1,754 @@ +{ + "-profile test --tools sentieon_haplotyper --joint_germline --sentieon_haplotyper_emit_mode gvcf": { + "content": [ + 19, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SENTIEON_APPLYVARCAL_INDEL": { + "sentieon": 202503.01 + }, + "SENTIEON_GVCFTYPER": { + "sentieon": 202503.01 + }, + "SENTIEON_HAPLOTYPER": { + "sentieon": 202503.01 + }, + "SENTIEON_VARCAL_INDEL": { + "sentieon": 202503.01 + }, + "SENTIEON_VARCAL_SNP": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/sentieon_haplotyper", + "reports/bcftools/sentieon_haplotyper/joint_variant_calling", + "reports/bcftools/sentieon_haplotyper/joint_variant_calling/joint_germline_recalibrated_indel.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/sentieon_haplotyper", + "reports/vcftools/sentieon_haplotyper/joint_variant_calling", + "reports/vcftools/sentieon_haplotyper/joint_variant_calling/joint_germline_recalibrated_indel.FILTER.summary", + "reports/vcftools/sentieon_haplotyper/joint_variant_calling/joint_germline_recalibrated_indel.TsTv.count", + "reports/vcftools/sentieon_haplotyper/joint_variant_calling/joint_germline_recalibrated_indel.TsTv.qual", + "variant_calling", + "variant_calling/sentieon_haplotyper", + "variant_calling/sentieon_haplotyper/joint_variant_calling", + "variant_calling/sentieon_haplotyper/joint_variant_calling/joint_germline.vcf.gz", + "variant_calling/sentieon_haplotyper/joint_variant_calling/joint_germline.vcf.gz.tbi", + "variant_calling/sentieon_haplotyper/joint_variant_calling/joint_germline_recalibrated_indel.vcf.gz", + "variant_calling/sentieon_haplotyper/joint_variant_calling/joint_germline_recalibrated_indel.vcf.gz.tbi", + "variant_calling/sentieon_haplotyper/test", + "variant_calling/sentieon_haplotyper/test/test.haplotyper.g.vcf.gz", + "variant_calling/sentieon_haplotyper/test/test.haplotyper.g.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,115b9a93bcf511419e6622919172f321", + "mosdepth-cumcoverage-dist-id.txt:md5,df2fc825dfc41e30f72f5d125a6447ee", + "mosdepth_perchrom.txt:md5,115b9a93bcf511419e6622919172f321", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,06abce459a3e3192252b6196ae3b189e", + "samtools_alignment_plot.txt:md5,3622ecb71829cc1c581157e403048fd1", + "joint_germline_recalibrated_indel.bcftools_stats.txt:md5,f223948705dd87d2edcc6a44ca72df64", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "joint_germline_recalibrated_indel.FILTER.summary:md5,7e04c3bed9ecbb73aa3c02c2b6b46089", + "joint_germline_recalibrated_indel.TsTv.count:md5,89562fef808b5c3db629682d36fd86fc" + ], + "No BAM files", + "No CRAM files", + [ + "joint_germline.vcf.gz:md5,118f8b58defd673dc9c053da6fcd9129", + "joint_germline_recalibrated_indel.vcf.gz:md5,8133d94724e35be846623fbdb186acc2", + "test.haplotyper.g.vcf.gz:md5,5a556dc23ecea0e9f15c7eeeff563a5f" + ], + [ + "WARN: If GATK's Haplotypecaller, Sentieon's Dnascope and/or Sentieon's Haplotyper is specified, but without `--dbsnp`, `--known_snps`, `--known_indels` or the associated resource labels (ie `known_snps_vqsr`), no variant recalibration will be done. For recalibration you must provide all of these resources." + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-06-23T20:44:13.797239672" + }, + "-profile test --tools sentieon_haplotyper --no_intervals": { + "content": [ + 11, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CNNSCOREVARIANTS": { + "gatk4": "4.5.0.0" + }, + "FILTERVARIANTTRANCHES": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SENTIEON_HAPLOTYPER": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/sentieon_haplotyper", + "reports/bcftools/sentieon_haplotyper/test", + "reports/bcftools/sentieon_haplotyper/test/test.haplotyper.filtered.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/sentieon_haplotyper", + "reports/vcftools/sentieon_haplotyper/test", + "reports/vcftools/sentieon_haplotyper/test/test.haplotyper.filtered.FILTER.summary", + "reports/vcftools/sentieon_haplotyper/test/test.haplotyper.filtered.TsTv.count", + "reports/vcftools/sentieon_haplotyper/test/test.haplotyper.filtered.TsTv.qual", + "variant_calling", + "variant_calling/sentieon_haplotyper", + "variant_calling/sentieon_haplotyper/test", + "variant_calling/sentieon_haplotyper/test/test.haplotyper.filtered.vcf.gz", + "variant_calling/sentieon_haplotyper/test/test.haplotyper.filtered.vcf.gz.tbi", + "variant_calling/sentieon_haplotyper/test/test.haplotyper.unfiltered.vcf.gz", + "variant_calling/sentieon_haplotyper/test/test.haplotyper.unfiltered.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,115b9a93bcf511419e6622919172f321", + "mosdepth-cumcoverage-dist-id.txt:md5,df2fc825dfc41e30f72f5d125a6447ee", + "mosdepth_perchrom.txt:md5,115b9a93bcf511419e6622919172f321", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,06abce459a3e3192252b6196ae3b189e", + "samtools_alignment_plot.txt:md5,3622ecb71829cc1c581157e403048fd1", + "test.haplotyper.filtered.bcftools_stats.txt:md5,178ef0aeaef94b9de01a44d833823918", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "test.haplotyper.filtered.FILTER.summary:md5,d501a93356f3c91c743f51104e24514a", + "test.haplotyper.filtered.TsTv.count:md5,89562fef808b5c3db629682d36fd86fc" + ], + "No BAM files", + "No CRAM files", + [ + "test.haplotyper.filtered.vcf.gz:md5,a7cb2001286fa02622676547f21f263b", + "test.haplotyper.unfiltered.vcf.gz:md5,722b5007cf143487b8c39d54c20ab77f" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-06-21T14:35:09.570055964" + }, + "-profile test --tools sentieon_haplotyper --skip_tools haplotyper_filter": { + "content": [ + 12, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SENTIEON_HAPLOTYPER": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/sentieon_haplotyper", + "reports/bcftools/sentieon_haplotyper/test", + "reports/bcftools/sentieon_haplotyper/test/test.haplotyper.unfiltered.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/sentieon_haplotyper", + "reports/vcftools/sentieon_haplotyper/test", + "reports/vcftools/sentieon_haplotyper/test/test.haplotyper.unfiltered.FILTER.summary", + "reports/vcftools/sentieon_haplotyper/test/test.haplotyper.unfiltered.TsTv.count", + "reports/vcftools/sentieon_haplotyper/test/test.haplotyper.unfiltered.TsTv.qual", + "variant_calling", + "variant_calling/sentieon_haplotyper", + "variant_calling/sentieon_haplotyper/test", + "variant_calling/sentieon_haplotyper/test/test.haplotyper.unfiltered.vcf.gz", + "variant_calling/sentieon_haplotyper/test/test.haplotyper.unfiltered.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,115b9a93bcf511419e6622919172f321", + "mosdepth-cumcoverage-dist-id.txt:md5,df2fc825dfc41e30f72f5d125a6447ee", + "mosdepth_perchrom.txt:md5,115b9a93bcf511419e6622919172f321", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,06abce459a3e3192252b6196ae3b189e", + "samtools_alignment_plot.txt:md5,3622ecb71829cc1c581157e403048fd1", + "test.haplotyper.unfiltered.bcftools_stats.txt:md5,a486e123a1466777f49bab8c854f0e55", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "test.haplotyper.unfiltered.FILTER.summary:md5,01b3d10464a3ac86f90ee82cdda23f68", + "test.haplotyper.unfiltered.TsTv.count:md5,89562fef808b5c3db629682d36fd86fc" + ], + "No BAM files", + "No CRAM files", + [ + "test.haplotyper.unfiltered.vcf.gz:md5,722b5007cf143487b8c39d54c20ab77f" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-06-21T14:41:55.821821171" + }, + "-profile test --tools sentieon_haplotyper": { + "content": [ + 14, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CNNSCOREVARIANTS": { + "gatk4": "4.5.0.0" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "FILTERVARIANTTRANCHES": { + "gatk4": "4.6.1.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SENTIEON_HAPLOTYPER": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/sentieon_haplotyper", + "reports/bcftools/sentieon_haplotyper/test", + "reports/bcftools/sentieon_haplotyper/test/test.haplotyper.filtered.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/test", + "reports/mosdepth/test/test.recal.mosdepth.global.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.region.dist.txt", + "reports/mosdepth/test/test.recal.mosdepth.summary.txt", + "reports/mosdepth/test/test.recal.regions.bed.gz", + "reports/mosdepth/test/test.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/test", + "reports/samtools/test/test.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/sentieon_haplotyper", + "reports/vcftools/sentieon_haplotyper/test", + "reports/vcftools/sentieon_haplotyper/test/test.haplotyper.filtered.FILTER.summary", + "reports/vcftools/sentieon_haplotyper/test/test.haplotyper.filtered.TsTv.count", + "reports/vcftools/sentieon_haplotyper/test/test.haplotyper.filtered.TsTv.qual", + "variant_calling", + "variant_calling/sentieon_haplotyper", + "variant_calling/sentieon_haplotyper/test", + "variant_calling/sentieon_haplotyper/test/test.haplotyper.filtered.vcf.gz", + "variant_calling/sentieon_haplotyper/test/test.haplotyper.filtered.vcf.gz.tbi", + "variant_calling/sentieon_haplotyper/test/test.haplotyper.unfiltered.vcf.gz", + "variant_calling/sentieon_haplotyper/test/test.haplotyper.unfiltered.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,115b9a93bcf511419e6622919172f321", + "mosdepth-cumcoverage-dist-id.txt:md5,df2fc825dfc41e30f72f5d125a6447ee", + "mosdepth_perchrom.txt:md5,115b9a93bcf511419e6622919172f321", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,06abce459a3e3192252b6196ae3b189e", + "samtools_alignment_plot.txt:md5,3622ecb71829cc1c581157e403048fd1", + "test.haplotyper.filtered.bcftools_stats.txt:md5,178ef0aeaef94b9de01a44d833823918", + "test.recal.mosdepth.global.dist.txt:md5,bdb8f185c35dd1eec7ce2f69bce57972", + "test.recal.mosdepth.region.dist.txt:md5,f1f1ad86fc280bced1888a5d7d25a3f2", + "test.recal.mosdepth.summary.txt:md5,32ea70ef1b99def3dc900b4afd513a40", + "test.recal.regions.bed.gz:md5,07bbc084a889f1cece4307fd00214a6e", + "test.recal.regions.bed.gz.csi:md5,b3716e5cd1744610e69c29bd4ffad259", + "test.haplotyper.filtered.FILTER.summary:md5,d501a93356f3c91c743f51104e24514a", + "test.haplotyper.filtered.TsTv.count:md5,89562fef808b5c3db629682d36fd86fc" + ], + "No BAM files", + "No CRAM files", + [ + "test.haplotyper.filtered.vcf.gz:md5,a7cb2001286fa02622676547f21f263b", + "test.haplotyper.unfiltered.vcf.gz:md5,722b5007cf143487b8c39d54c20ab77f" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-06-21T14:24:33.689599405" + } +} diff --git a/tests/variant_calling_sentieon_tnscope.nf.test b/tests/variant_calling_sentieon_tnscope.nf.test new file mode 100644 index 0000000000..4709ec3bba --- /dev/null +++ b/tests/variant_calling_sentieon_tnscope.nf.test @@ -0,0 +1,51 @@ +def projectDir = new File('.').absolutePath + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --input fastq_pair.csv --tools sentieon_tnscope", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_pair.csv", + aligner: 'sentieon-bwamem', + tools: "sentieon_dedup,sentieon_tnscope", + skip_tools : 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools,bcftools' + ] + ], + [ + name: "-profile test --input fastq_pair.csv --tools sentieon_tnscope --no_intervals", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_pair.csv", + aligner: 'sentieon-bwamem', + tools: "sentieon_dedup,sentieon_tnscope", + skip_tools : 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools,bcftools', + no_intervals: true + ] + ], + [ + name: "-profile test --input fastq_tumor_only.csv --tools sentieon_tnscope", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_tumor_only.csv", + aligner: 'sentieon-bwamem', + tools: "sentieon_dedup,sentieon_tnscope", + skip_tools : 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools,bcftools' + ] + ], + [ + name: "-profile test --input fastq_triple_two_tumor.csv --tools sentieon_tnscope", + params: [ + input: "${projectDir}/tests/csv/3.0/fastq_triple_two_tumor.csv", + aligner: 'sentieon-bwamem', + tools: "sentieon_dedup,sentieon_tnscope", + skip_tools : 'baserecalibrator,fastqc,markduplicates,mosdepth,multiqc,samtools,bcftools' + ] + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_sentieon_tnscope.nf.test.snap b/tests/variant_calling_sentieon_tnscope.nf.test.snap new file mode 100644 index 0000000000..40a45489f8 --- /dev/null +++ b/tests/variant_calling_sentieon_tnscope.nf.test.snap @@ -0,0 +1,377 @@ +{ + "-profile test --input fastq_tumor_only.csv --tools sentieon_tnscope": { + "content": [ + 11, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "SENTIEON_BWAMEM": { + "bwa": "0.7.17-r1188", + "sentieon": 202503.01 + }, + "SENTIEON_DEDUP": { + "sentieon": 202503.01 + }, + "SENTIEON_TNSCOPE": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates_no_table.csv", + "csv/variantcalled.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/sentieon_dedup", + "preprocessing/sentieon_dedup/test2", + "preprocessing/sentieon_dedup/test2/test2.dedup.cram", + "preprocessing/sentieon_dedup/test2/test2.dedup.cram.crai", + "reference", + "reports", + "reports/sentieon_dedup", + "reports/sentieon_dedup/test2", + "reports/sentieon_dedup/test2/test2.dedup.cram.metrics", + "reports/sentieon_dedup/test2/test2.dedup.cram.metrics.multiqc.tsv", + "reports/vcftools", + "reports/vcftools/sentieon_tnscope", + "reports/vcftools/sentieon_tnscope/test2", + "reports/vcftools/sentieon_tnscope/test2/test2.tnscope.FILTER.summary", + "reports/vcftools/sentieon_tnscope/test2/test2.tnscope.TsTv.count", + "reports/vcftools/sentieon_tnscope/test2/test2.tnscope.TsTv.qual", + "variant_calling", + "variant_calling/sentieon_tnscope", + "variant_calling/sentieon_tnscope/test2", + "variant_calling/sentieon_tnscope/test2/test2.tnscope.vcf.gz", + "variant_calling/sentieon_tnscope/test2/test2.tnscope.vcf.gz.tbi" + ], + [ + "test2.tnscope.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "test2.tnscope.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a" + ], + "No BAM files", + [ + "test2.dedup.cram:md5,bf76728a0d29b246f7fb240ec15a52fc" + ], + [ + "test2.tnscope.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998513#1/1" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-07-03T13:19:32.295689002" + }, + "-profile test --input fastq_triple_two_tumor.csv --tools sentieon_tnscope": { + "content": [ + 19, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "SENTIEON_BWAMEM": { + "bwa": "0.7.17-r1188", + "sentieon": 202503.01 + }, + "SENTIEON_DEDUP": { + "sentieon": 202503.01 + }, + "SENTIEON_TNSCOPE": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates_no_table.csv", + "csv/variantcalled.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/sentieon_dedup", + "preprocessing/sentieon_dedup/test", + "preprocessing/sentieon_dedup/test/test.dedup.cram", + "preprocessing/sentieon_dedup/test/test.dedup.cram.crai", + "preprocessing/sentieon_dedup/test2", + "preprocessing/sentieon_dedup/test2/test2.dedup.cram", + "preprocessing/sentieon_dedup/test2/test2.dedup.cram.crai", + "preprocessing/sentieon_dedup/test3", + "preprocessing/sentieon_dedup/test3/test3.dedup.cram", + "preprocessing/sentieon_dedup/test3/test3.dedup.cram.crai", + "reference", + "reports", + "reports/sentieon_dedup", + "reports/sentieon_dedup/test", + "reports/sentieon_dedup/test/test.dedup.cram.metrics", + "reports/sentieon_dedup/test/test.dedup.cram.metrics.multiqc.tsv", + "reports/sentieon_dedup/test2", + "reports/sentieon_dedup/test2/test2.dedup.cram.metrics", + "reports/sentieon_dedup/test2/test2.dedup.cram.metrics.multiqc.tsv", + "reports/sentieon_dedup/test3", + "reports/sentieon_dedup/test3/test3.dedup.cram.metrics", + "reports/sentieon_dedup/test3/test3.dedup.cram.metrics.multiqc.tsv", + "reports/vcftools", + "reports/vcftools/sentieon_tnscope", + "reports/vcftools/sentieon_tnscope/test2_vs_test", + "reports/vcftools/sentieon_tnscope/test2_vs_test/test2_vs_test.tnscope.FILTER.summary", + "reports/vcftools/sentieon_tnscope/test2_vs_test/test2_vs_test.tnscope.TsTv.count", + "reports/vcftools/sentieon_tnscope/test2_vs_test/test2_vs_test.tnscope.TsTv.qual", + "reports/vcftools/sentieon_tnscope/test3_vs_test", + "reports/vcftools/sentieon_tnscope/test3_vs_test/test3_vs_test.tnscope.FILTER.summary", + "reports/vcftools/sentieon_tnscope/test3_vs_test/test3_vs_test.tnscope.TsTv.count", + "reports/vcftools/sentieon_tnscope/test3_vs_test/test3_vs_test.tnscope.TsTv.qual", + "variant_calling", + "variant_calling/sentieon_tnscope", + "variant_calling/sentieon_tnscope/test2_vs_test", + "variant_calling/sentieon_tnscope/test2_vs_test/test2_vs_test.tnscope.vcf.gz", + "variant_calling/sentieon_tnscope/test2_vs_test/test2_vs_test.tnscope.vcf.gz.tbi", + "variant_calling/sentieon_tnscope/test3_vs_test", + "variant_calling/sentieon_tnscope/test3_vs_test/test3_vs_test.tnscope.vcf.gz", + "variant_calling/sentieon_tnscope/test3_vs_test/test3_vs_test.tnscope.vcf.gz.tbi" + ], + [ + "test2_vs_test.tnscope.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "test2_vs_test.tnscope.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f", + "test3_vs_test.tnscope.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "test3_vs_test.tnscope.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f" + ], + "No BAM files", + [ + "test.dedup.cram:md5,5a5741428f717a28533ff2e1c7cdb38d", + "test2.dedup.cram:md5,bf76728a0d29b246f7fb240ec15a52fc", + "test3.dedup.cram:md5,c993f2cf336031851fb0a092f54d7d0e" + ], + [ + "test2_vs_test.tnscope.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "test3_vs_test.tnscope.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test.umi_1.fastq.gz): Cannot extract flowcell ID from @922332/1", + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998513#1/1", + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-07-03T19:24:18.564064846" + }, + "-profile test --input fastq_pair.csv --tools sentieon_tnscope --no_intervals": { + "content": [ + 10, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "SENTIEON_BWAMEM": { + "bwa": "0.7.17-r1188", + "sentieon": 202503.01 + }, + "SENTIEON_DEDUP": { + "sentieon": 202503.01 + }, + "SENTIEON_TNSCOPE": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates_no_table.csv", + "csv/variantcalled.csv", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/sentieon_dedup", + "preprocessing/sentieon_dedup/test", + "preprocessing/sentieon_dedup/test/test.dedup.cram", + "preprocessing/sentieon_dedup/test/test.dedup.cram.crai", + "preprocessing/sentieon_dedup/test2", + "preprocessing/sentieon_dedup/test2/test2.dedup.cram", + "preprocessing/sentieon_dedup/test2/test2.dedup.cram.crai", + "reference", + "reports", + "reports/sentieon_dedup", + "reports/sentieon_dedup/test", + "reports/sentieon_dedup/test/test.dedup.cram.metrics", + "reports/sentieon_dedup/test/test.dedup.cram.metrics.multiqc.tsv", + "reports/sentieon_dedup/test2", + "reports/sentieon_dedup/test2/test2.dedup.cram.metrics", + "reports/sentieon_dedup/test2/test2.dedup.cram.metrics.multiqc.tsv", + "reports/vcftools", + "reports/vcftools/sentieon_tnscope", + "reports/vcftools/sentieon_tnscope/test2_vs_test", + "reports/vcftools/sentieon_tnscope/test2_vs_test/test2_vs_test.tnscope.FILTER.summary", + "reports/vcftools/sentieon_tnscope/test2_vs_test/test2_vs_test.tnscope.TsTv.count", + "reports/vcftools/sentieon_tnscope/test2_vs_test/test2_vs_test.tnscope.TsTv.qual", + "variant_calling", + "variant_calling/sentieon_tnscope", + "variant_calling/sentieon_tnscope/test2_vs_test", + "variant_calling/sentieon_tnscope/test2_vs_test/test2_vs_test.tnscope.vcf.gz", + "variant_calling/sentieon_tnscope/test2_vs_test/test2_vs_test.tnscope.vcf.gz.tbi" + ], + [ + "test2_vs_test.tnscope.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "test2_vs_test.tnscope.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f" + ], + "No BAM files", + [ + "test.dedup.cram:md5,5a5741428f717a28533ff2e1c7cdb38d", + "test2.dedup.cram:md5,bf76728a0d29b246f7fb240ec15a52fc" + ], + [ + "test2_vs_test.tnscope.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998513#1/1", + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-07-03T13:12:22.769869621" + }, + "-profile test --input fastq_pair.csv --tools sentieon_tnscope": { + "content": [ + 13, + { + "BWAMEM1_INDEX": { + "bwa": "0.7.18-r1243-dirty" + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "GATK4_INTERVALLISTTOBED": { + "gatk4": "4.6.1.0" + }, + "SENTIEON_BWAMEM": { + "bwa": "0.7.17-r1188", + "sentieon": 202503.01 + }, + "SENTIEON_DEDUP": { + "sentieon": 202503.01 + }, + "SENTIEON_TNSCOPE": { + "sentieon": 202503.01 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/markduplicates_no_table.csv", + "csv/variantcalled.csv", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "preprocessing", + "preprocessing/sentieon_dedup", + "preprocessing/sentieon_dedup/test", + "preprocessing/sentieon_dedup/test/test.dedup.cram", + "preprocessing/sentieon_dedup/test/test.dedup.cram.crai", + "preprocessing/sentieon_dedup/test2", + "preprocessing/sentieon_dedup/test2/test2.dedup.cram", + "preprocessing/sentieon_dedup/test2/test2.dedup.cram.crai", + "reference", + "reports", + "reports/sentieon_dedup", + "reports/sentieon_dedup/test", + "reports/sentieon_dedup/test/test.dedup.cram.metrics", + "reports/sentieon_dedup/test/test.dedup.cram.metrics.multiqc.tsv", + "reports/sentieon_dedup/test2", + "reports/sentieon_dedup/test2/test2.dedup.cram.metrics", + "reports/sentieon_dedup/test2/test2.dedup.cram.metrics.multiqc.tsv", + "reports/vcftools", + "reports/vcftools/sentieon_tnscope", + "reports/vcftools/sentieon_tnscope/test2_vs_test", + "reports/vcftools/sentieon_tnscope/test2_vs_test/test2_vs_test.tnscope.FILTER.summary", + "reports/vcftools/sentieon_tnscope/test2_vs_test/test2_vs_test.tnscope.TsTv.count", + "reports/vcftools/sentieon_tnscope/test2_vs_test/test2_vs_test.tnscope.TsTv.qual", + "variant_calling", + "variant_calling/sentieon_tnscope", + "variant_calling/sentieon_tnscope/test2_vs_test", + "variant_calling/sentieon_tnscope/test2_vs_test/test2_vs_test.tnscope.vcf.gz", + "variant_calling/sentieon_tnscope/test2_vs_test/test2_vs_test.tnscope.vcf.gz.tbi" + ], + [ + "test2_vs_test.tnscope.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "test2_vs_test.tnscope.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f" + ], + "No BAM files", + [ + "test.dedup.cram:md5,5a5741428f717a28533ff2e1c7cdb38d", + "test2.dedup.cram:md5,bf76728a0d29b246f7fb240ec15a52fc" + ], + [ + "test2_vs_test.tnscope.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998513#1/1", + "WARN: FASTQ file(/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz): Cannot extract flowcell ID from @normal#21#998579#1/1" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-07-03T13:39:14.931074918" + } +} diff --git a/tests/variant_calling_strelka.nf.test b/tests/variant_calling_strelka.nf.test new file mode 100644 index 0000000000..a91577ca3e --- /dev/null +++ b/tests/variant_calling_strelka.nf.test @@ -0,0 +1,73 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools strelka --only_paired_variant_calling", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated.csv", + step: "variant_calling", + tools: 'strelka', + only_paired_variant_calling: true + ] + ], + [ + name: "-profile test --tools strelka germline", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_germline.csv", + step: "variant_calling", + tools: 'strelka' + ] + ], + [ + name: "-profile test --tools strelka --no_intervals germline", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_germline.csv", + no_intervals: true, + step: "variant_calling", + tools: 'strelka' + ] + ], + [ + name: "-profile test --tools strelka somatic", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + step: "variant_calling", + tools: 'strelka' + ] + ], + [ + name: "-profile test --tools strelka --no_intervals somatic", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + no_intervals: true, + step: "variant_calling", + tools: 'strelka' + ] + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_strelka.nf.test.snap b/tests/variant_calling_strelka.nf.test.snap new file mode 100644 index 0000000000..50bc1b73d2 --- /dev/null +++ b/tests/variant_calling_strelka.nf.test.snap @@ -0,0 +1,1029 @@ +{ + "-profile test --tools strelka somatic": { + "content": [ + 22, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "STRELKA_SOMATIC": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/sample3", + "reports/bcftools/strelka/sample3/sample3.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/sample4_vs_sample3", + "reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt", + "reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/sample3", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.count", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/sample4_vs_sample3", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/sample3", + "variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz", + "variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz", + "variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,336d786b273c4d4e714d51c44207ff56", + "mosdepth-cumcoverage-dist-id.txt:md5,0cc87c596a0fc30ebb676c4587e986de", + "mosdepth_perchrom.txt:md5,336d786b273c4d4e714d51c44207ff56", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample3.strelka.variants.bcftools_stats.txt:md5,6d4d032ba146941cb226765aaed9d67f", + "sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt:md5,62c6123f6494c3cdbd42dc7230e757b3", + "sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt:md5,8404ea88658fbc41d447ba20bf46dd0a", + "sample3.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample3.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample3.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample3.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample3.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample4.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample4.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample4.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample4.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample4.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample3.strelka.variants.FILTER.summary:md5,fef8aeadd3b0f3b8c040c0da03bf1cbd", + "sample3.strelka.variants.TsTv.count:md5,c5b7a8eda2526d899098439ae4c06a49", + "sample4_vs_sample3.strelka.somatic_indels.FILTER.summary:md5,30a45e2bc87f40c89388032cbf75ec65", + "sample4_vs_sample3.strelka.somatic_indels.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f", + "sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary:md5,4fc17fa5625b4d1dcc5d791b1eb22d85", + "sample4_vs_sample3.strelka.somatic_snvs.TsTv.count:md5,fc7af1f534890c4ad3025588b3af62ae" + ], + "No BAM files", + "No CRAM files", + [ + "sample3.strelka.genome.vcf.gz:md5,aac21bf1449d3c7361c14054b08f8ed1", + "sample3.strelka.variants.vcf.gz:md5,dbea8798de92494c7a1fbf686c363696", + "sample4_vs_sample3.strelka.somatic_indels.vcf.gz:md5,6b8a01b52186fcaf3ec74e5bf1b5426", + "sample4_vs_sample3.strelka.somatic_snvs.vcf.gz:md5,866236c526138e4e08fdc2fa87b08417" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:48:23.853303187" + }, + "-profile test --tools strelka germline": { + "content": [ + 11, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/sample1", + "reports/bcftools/strelka/sample1/sample1.strelka.variants.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample1", + "reports/mosdepth/sample1/sample1.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.summary.txt", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample1", + "reports/samtools/sample1/sample1.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/sample1", + "reports/vcftools/strelka/sample1/sample1.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.count", + "reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/sample1", + "variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz", + "variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/sample1/sample1.strelka.variants.vcf.gz", + "variant_calling/strelka/sample1/sample1.strelka.variants.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,49fd828d21d79a5c9430cdb30d7f0126", + "mosdepth-cumcoverage-dist-id.txt:md5,edd8dfc8023e629e476e01c3a1448d30", + "mosdepth_perchrom.txt:md5,49fd828d21d79a5c9430cdb30d7f0126", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,41d36c1a8413d565788a1a364b467606", + "samtools_alignment_plot.txt:md5,35f8fd2a557568b2237193f46afbab5c", + "sample1.strelka.variants.bcftools_stats.txt:md5,7d091579d450a6f6d6e6ed9795dce0cb", + "sample1.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample1.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample1.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample1.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample1.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample1.strelka.variants.FILTER.summary:md5,fef8aeadd3b0f3b8c040c0da03bf1cbd", + "sample1.strelka.variants.TsTv.count:md5,c5b7a8eda2526d899098439ae4c06a49" + ], + "No BAM files", + "No CRAM files", + [ + "sample1.strelka.genome.vcf.gz:md5,aac21bf1449d3c7361c14054b08f8ed1", + "sample1.strelka.variants.vcf.gz:md5,dbea8798de92494c7a1fbf686c363696" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:45:18.036242508" + }, + "-profile test --tools strelka --only_paired_variant_calling": { + "content": [ + 26, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "STRELKA_SOMATIC": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/sample1", + "reports/bcftools/strelka/sample1/sample1.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/sample4_vs_sample3", + "reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt", + "reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample1", + "reports/mosdepth/sample1/sample1.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.summary.txt", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz.csi", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample1", + "reports/samtools/sample1/sample1.recal.cram.stats", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/sample1", + "reports/vcftools/strelka/sample1/sample1.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.count", + "reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/sample4_vs_sample3", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/sample1", + "variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz", + "variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/sample1/sample1.strelka.variants.vcf.gz", + "variant_calling/strelka/sample1/sample1.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,73ef9a077df1887f9021a581fbf207bc", + "mosdepth-cumcoverage-dist-id.txt:md5,ad0637d55d7025330f2f6cb7f9680e64", + "mosdepth_perchrom.txt:md5,73ef9a077df1887f9021a581fbf207bc", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,b446a47b182d93a9e7e74f5f7c8d41c2", + "samtools_alignment_plot.txt:md5,7138a2d29f515993e1df8d745e27b757", + "sample1.strelka.variants.bcftools_stats.txt:md5,7d091579d450a6f6d6e6ed9795dce0cb", + "sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt:md5,62c6123f6494c3cdbd42dc7230e757b3", + "sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt:md5,8404ea88658fbc41d447ba20bf46dd0a", + "sample1.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample1.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample1.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample1.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample1.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample2.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample2.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample2.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample2.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample2.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample3.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample3.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample3.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample3.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample3.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample4.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample4.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample4.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample4.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample4.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample1.strelka.variants.FILTER.summary:md5,fef8aeadd3b0f3b8c040c0da03bf1cbd", + "sample1.strelka.variants.TsTv.count:md5,c5b7a8eda2526d899098439ae4c06a49", + "sample4_vs_sample3.strelka.somatic_indels.FILTER.summary:md5,30a45e2bc87f40c89388032cbf75ec65", + "sample4_vs_sample3.strelka.somatic_indels.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f", + "sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary:md5,4fc17fa5625b4d1dcc5d791b1eb22d85", + "sample4_vs_sample3.strelka.somatic_snvs.TsTv.count:md5,fc7af1f534890c4ad3025588b3af62ae" + ], + "No BAM files", + "No CRAM files", + [ + "sample1.strelka.genome.vcf.gz:md5,aac21bf1449d3c7361c14054b08f8ed1", + "sample1.strelka.variants.vcf.gz:md5,dbea8798de92494c7a1fbf686c363696", + "sample4_vs_sample3.strelka.somatic_indels.vcf.gz:md5,6b8a01b52186fcaf3ec74e5bf1b5426", + "sample4_vs_sample3.strelka.somatic_snvs.vcf.gz:md5,866236c526138e4e08fdc2fa87b08417" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:43:48.55687233" + }, + "-profile test --tools strelka --no_intervals germline": { + "content": [ + 9, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/sample1", + "reports/bcftools/strelka/sample1/sample1.strelka.variants.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample1", + "reports/mosdepth/sample1/sample1.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.summary.txt", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample1", + "reports/samtools/sample1/sample1.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/sample1", + "reports/vcftools/strelka/sample1/sample1.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.count", + "reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.qual", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/sample1", + "variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz", + "variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/sample1/sample1.strelka.variants.vcf.gz", + "variant_calling/strelka/sample1/sample1.strelka.variants.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,49fd828d21d79a5c9430cdb30d7f0126", + "mosdepth-cumcoverage-dist-id.txt:md5,edd8dfc8023e629e476e01c3a1448d30", + "mosdepth_perchrom.txt:md5,49fd828d21d79a5c9430cdb30d7f0126", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,41d36c1a8413d565788a1a364b467606", + "samtools_alignment_plot.txt:md5,35f8fd2a557568b2237193f46afbab5c", + "sample1.strelka.variants.bcftools_stats.txt:md5,a125b261633ee5e73c4c0bfead86c77c", + "sample1.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample1.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample1.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample1.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample1.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample1.strelka.variants.FILTER.summary:md5,8697a0a983314e98b99b5f6038af65f6", + "sample1.strelka.variants.TsTv.count:md5,1481854d2a765f5641856ecf95ca4097" + ], + "No BAM files", + "No CRAM files", + [ + "sample1.strelka.genome.vcf.gz:md5,37d003dfa8685cac4791cbde1af9b92d", + "sample1.strelka.variants.vcf.gz:md5,6e7f36df8e96c983ef075d376431d72a" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:46:48.703134861" + }, + "-profile test --tools strelka --no_intervals somatic": { + "content": [ + 20, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "STRELKA_SOMATIC": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/strelka", + "reports/bcftools/strelka/sample3", + "reports/bcftools/strelka/sample3/sample3.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/sample4_vs_sample3", + "reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt", + "reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/strelka", + "reports/vcftools/strelka/sample3", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.count", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/sample4_vs_sample3", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual", + "variant_calling", + "variant_calling/strelka", + "variant_calling/strelka/sample3", + "variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz", + "variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz", + "variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,336d786b273c4d4e714d51c44207ff56", + "mosdepth-cumcoverage-dist-id.txt:md5,0cc87c596a0fc30ebb676c4587e986de", + "mosdepth_perchrom.txt:md5,336d786b273c4d4e714d51c44207ff56", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample3.strelka.variants.bcftools_stats.txt:md5,322c544c624565a9ab0e128bca556d81", + "sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt:md5,62c6123f6494c3cdbd42dc7230e757b3", + "sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt:md5,eec9d410eb24068b9a67be417c41d54e", + "sample3.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample3.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample3.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample3.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample3.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample4.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample4.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample4.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample4.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample4.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample3.strelka.variants.FILTER.summary:md5,8697a0a983314e98b99b5f6038af65f6", + "sample3.strelka.variants.TsTv.count:md5,1481854d2a765f5641856ecf95ca4097", + "sample4_vs_sample3.strelka.somatic_indels.FILTER.summary:md5,30a45e2bc87f40c89388032cbf75ec65", + "sample4_vs_sample3.strelka.somatic_indels.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f", + "sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary:md5,7a81b11aa29fec73d5bc872b7b58f8aa", + "sample4_vs_sample3.strelka.somatic_snvs.TsTv.count:md5,a922c51ca3b2ea7cdcfa09e9c8c55d52" + ], + "No BAM files", + "No CRAM files", + [ + "sample3.strelka.genome.vcf.gz:md5,37d003dfa8685cac4791cbde1af9b92d", + "sample3.strelka.variants.vcf.gz:md5,6e7f36df8e96c983ef075d376431d72a", + "sample4_vs_sample3.strelka.somatic_indels.vcf.gz:md5,6b8a01b52186fcaf3ec74e5bf1b5426", + "sample4_vs_sample3.strelka.somatic_snvs.vcf.gz:md5,262dcc485506abb0b67c8235354fd6e3" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:49:57.22542474" + } +} diff --git a/tests/variant_calling_strelka_bp.nf.test b/tests/variant_calling_strelka_bp.nf.test new file mode 100644 index 0000000000..f5eb0cf6d1 --- /dev/null +++ b/tests/variant_calling_strelka_bp.nf.test @@ -0,0 +1,38 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools manta,strelka somatic", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + step: "variant_calling", + tools: 'manta,strelka' + ] + ], + [ + name: "-profile test --tools manta,strelka --no_intervals somatic", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + no_intervals: true, + step: "variant_calling", + tools: 'manta,strelka' + ] + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_strelka_bp.nf.test.snap b/tests/variant_calling_strelka_bp.nf.test.snap new file mode 100644 index 0000000000..a4c74d103b --- /dev/null +++ b/tests/variant_calling_strelka_bp.nf.test.snap @@ -0,0 +1,526 @@ +{ + "-profile test --tools manta,strelka --no_intervals somatic": { + "content": [ + 34, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "MANTA_GERMLINE": { + "manta": "1.6.0" + }, + "MANTA_SOMATIC": { + "manta": "1.6.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "STRELKA_SOMATIC": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "no_intervals.bed", + "no_intervals.bed.gz", + "no_intervals.bed.gz.tbi", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/manta", + "reports/bcftools/manta/sample3", + "reports/bcftools/manta/sample3/sample3.manta.diploid_sv.bcftools_stats.txt", + "reports/bcftools/manta/sample4_vs_sample3", + "reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt", + "reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt", + "reports/bcftools/strelka", + "reports/bcftools/strelka/sample3", + "reports/bcftools/strelka/sample3/sample3.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/sample4_vs_sample3", + "reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt", + "reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/manta", + "reports/vcftools/manta/sample3", + "reports/vcftools/manta/sample3/sample3.manta.diploid_sv.FILTER.summary", + "reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.count", + "reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.qual", + "reports/vcftools/manta/sample4_vs_sample3", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.FILTER.summary", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.count", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.qual", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.FILTER.summary", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.count", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.qual", + "reports/vcftools/strelka", + "reports/vcftools/strelka/sample3", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.count", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/sample4_vs_sample3", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual", + "variant_calling", + "variant_calling/manta", + "variant_calling/manta/sample3", + "variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz", + "variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz.tbi", + "variant_calling/manta/sample4_vs_sample3", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz.tbi", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz.tbi", + "variant_calling/strelka", + "variant_calling/strelka/sample3", + "variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz", + "variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz", + "variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,336d786b273c4d4e714d51c44207ff56", + "mosdepth-cumcoverage-dist-id.txt:md5,0cc87c596a0fc30ebb676c4587e986de", + "mosdepth_perchrom.txt:md5,336d786b273c4d4e714d51c44207ff56", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample3.manta.diploid_sv.bcftools_stats.txt:md5,36a838390faba81e3eabf5ac8a093a4a", + "sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt:md5,f00cf810d34ef7e5c7980f7039bb4446", + "sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt:md5,7af2ea2e84154ddf2a483b1bd1f0646c", + "sample3.strelka.variants.bcftools_stats.txt:md5,322c544c624565a9ab0e128bca556d81", + "sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt:md5,62c6123f6494c3cdbd42dc7230e757b3", + "sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt:md5,eec9d410eb24068b9a67be417c41d54e", + "sample3.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample3.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample3.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample3.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample3.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample4.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample4.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample4.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample4.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample4.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample3.manta.diploid_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample3.manta.diploid_sv.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "sample4_vs_sample3.manta.diploid_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample4_vs_sample3.manta.diploid_sv.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "sample4_vs_sample3.manta.somatic_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample4_vs_sample3.manta.somatic_sv.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f", + "sample3.strelka.variants.FILTER.summary:md5,8697a0a983314e98b99b5f6038af65f6", + "sample3.strelka.variants.TsTv.count:md5,1481854d2a765f5641856ecf95ca4097", + "sample4_vs_sample3.strelka.somatic_indels.FILTER.summary:md5,30a45e2bc87f40c89388032cbf75ec65", + "sample4_vs_sample3.strelka.somatic_indels.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f", + "sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary:md5,7a81b11aa29fec73d5bc872b7b58f8aa", + "sample4_vs_sample3.strelka.somatic_snvs.TsTv.count:md5,a922c51ca3b2ea7cdcfa09e9c8c55d52" + ], + "No BAM files", + "No CRAM files", + [ + "sample3.manta.diploid_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample4_vs_sample3.manta.diploid_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample4_vs_sample3.manta.somatic_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample3.strelka.genome.vcf.gz:md5,37d003dfa8685cac4791cbde1af9b92d", + "sample3.strelka.variants.vcf.gz:md5,6e7f36df8e96c983ef075d376431d72a", + "sample4_vs_sample3.strelka.somatic_indels.vcf.gz:md5,6b8a01b52186fcaf3ec74e5bf1b5426", + "sample4_vs_sample3.strelka.somatic_snvs.vcf.gz:md5,262dcc485506abb0b67c8235354fd6e3" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:53:32.414589191" + }, + "-profile test --tools manta,strelka somatic": { + "content": [ + 36, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MANTA_GERMLINE": { + "manta": "1.6.0" + }, + "MANTA_SOMATIC": { + "manta": "1.6.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "STRELKA_SINGLE": { + "strelka": "2.9.10" + }, + "STRELKA_SOMATIC": { + "strelka": "2.9.10" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_variant_depths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_variant_depths.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_qual.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_variant_depths.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_qual.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_variant_depths.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_qual.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/manta", + "reports/bcftools/manta/sample3", + "reports/bcftools/manta/sample3/sample3.manta.diploid_sv.bcftools_stats.txt", + "reports/bcftools/manta/sample4_vs_sample3", + "reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt", + "reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt", + "reports/bcftools/strelka", + "reports/bcftools/strelka/sample3", + "reports/bcftools/strelka/sample3/sample3.strelka.variants.bcftools_stats.txt", + "reports/bcftools/strelka/sample4_vs_sample3", + "reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt", + "reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/manta", + "reports/vcftools/manta/sample3", + "reports/vcftools/manta/sample3/sample3.manta.diploid_sv.FILTER.summary", + "reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.count", + "reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.qual", + "reports/vcftools/manta/sample4_vs_sample3", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.FILTER.summary", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.count", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.qual", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.FILTER.summary", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.count", + "reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.qual", + "reports/vcftools/strelka", + "reports/vcftools/strelka/sample3", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.FILTER.summary", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.count", + "reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.qual", + "reports/vcftools/strelka/sample4_vs_sample3", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count", + "reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual", + "variant_calling", + "variant_calling/manta", + "variant_calling/manta/sample3", + "variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz", + "variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz.tbi", + "variant_calling/manta/sample4_vs_sample3", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz.tbi", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz", + "variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz.tbi", + "variant_calling/strelka", + "variant_calling/strelka/sample3", + "variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz", + "variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz.tbi", + "variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz", + "variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz.tbi", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz", + "variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,336d786b273c4d4e714d51c44207ff56", + "mosdepth-cumcoverage-dist-id.txt:md5,0cc87c596a0fc30ebb676c4587e986de", + "mosdepth_perchrom.txt:md5,336d786b273c4d4e714d51c44207ff56", + "multiqc_citations.txt:md5,d40980f61eb64026d58102841b7f3860", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample3.manta.diploid_sv.bcftools_stats.txt:md5,36a838390faba81e3eabf5ac8a093a4a", + "sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt:md5,f00cf810d34ef7e5c7980f7039bb4446", + "sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt:md5,7af2ea2e84154ddf2a483b1bd1f0646c", + "sample3.strelka.variants.bcftools_stats.txt:md5,6d4d032ba146941cb226765aaed9d67f", + "sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt:md5,62c6123f6494c3cdbd42dc7230e757b3", + "sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt:md5,8404ea88658fbc41d447ba20bf46dd0a", + "sample3.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample3.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample3.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample3.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample3.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample4.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample4.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample4.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample4.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample4.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample3.manta.diploid_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample3.manta.diploid_sv.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "sample4_vs_sample3.manta.diploid_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample4_vs_sample3.manta.diploid_sv.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "sample4_vs_sample3.manta.somatic_sv.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample4_vs_sample3.manta.somatic_sv.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f", + "sample3.strelka.variants.FILTER.summary:md5,fef8aeadd3b0f3b8c040c0da03bf1cbd", + "sample3.strelka.variants.TsTv.count:md5,c5b7a8eda2526d899098439ae4c06a49", + "sample4_vs_sample3.strelka.somatic_indels.FILTER.summary:md5,30a45e2bc87f40c89388032cbf75ec65", + "sample4_vs_sample3.strelka.somatic_indels.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f", + "sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary:md5,4fc17fa5625b4d1dcc5d791b1eb22d85", + "sample4_vs_sample3.strelka.somatic_snvs.TsTv.count:md5,fc7af1f534890c4ad3025588b3af62ae" + ], + "No BAM files", + "No CRAM files", + [ + "sample3.manta.diploid_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample4_vs_sample3.manta.diploid_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample4_vs_sample3.manta.somatic_sv.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample3.strelka.genome.vcf.gz:md5,aac21bf1449d3c7361c14054b08f8ed1", + "sample3.strelka.variants.vcf.gz:md5,dbea8798de92494c7a1fbf686c363696", + "sample4_vs_sample3.strelka.somatic_indels.vcf.gz:md5,6b8a01b52186fcaf3ec74e5bf1b5426", + "sample4_vs_sample3.strelka.somatic_snvs.vcf.gz:md5,866236c526138e4e08fdc2fa87b08417" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:51:45.217803492" + } +} diff --git a/tests/variant_calling_tiddit.nf.test b/tests/variant_calling_tiddit.nf.test new file mode 100644 index 0000000000..9d4d08951b --- /dev/null +++ b/tests/variant_calling_tiddit.nf.test @@ -0,0 +1,48 @@ +def projectDir = new File('.').absolutePath +def modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + + def test_scenario = [ + [ + name: "-profile test --tools tiddit germline", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_germline.csv", + step: "variant_calling", + tools: 'tiddit' + ] + ], + [ + name: "-profile test --tools tiddit somatic", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_somatic.csv", + step: "variant_calling", + tools: 'tiddit' + ] + ], + [ + name: "-profile test --tools tiddit tumoronly", + params: [ + fasta: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', + fasta_fai: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', + intervals: modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', + input: "${projectDir}/tests/csv/3.0/recalibrated_tumoronly.csv", + step: "variant_calling", + tools: 'tiddit' + ] + ] + ] + + test_scenario.each { scenario -> + test(scenario.name, UTILS.getTest(scenario)) + } +} diff --git a/tests/variant_calling_tiddit.nf.test.snap b/tests/variant_calling_tiddit.nf.test.snap new file mode 100644 index 0000000000..a2740de990 --- /dev/null +++ b/tests/variant_calling_tiddit.nf.test.snap @@ -0,0 +1,473 @@ +{ + "-profile test --tools tiddit germline": { + "content": [ + 12, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIP_TIDDIT_SV": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TIDDIT_SV": { + "tiddit": "3.6.1" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/tiddit", + "reports/bcftools/tiddit/sample1", + "reports/bcftools/tiddit/sample1/sample1.tiddit.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample1", + "reports/mosdepth/sample1/sample1.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample1/sample1.recal.mosdepth.summary.txt", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz", + "reports/mosdepth/sample1/sample1.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample1", + "reports/samtools/sample1/sample1.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/tiddit", + "reports/vcftools/tiddit/sample1", + "reports/vcftools/tiddit/sample1/sample1.tiddit.FILTER.summary", + "reports/vcftools/tiddit/sample1/sample1.tiddit.TsTv.count", + "reports/vcftools/tiddit/sample1/sample1.tiddit.TsTv.qual", + "variant_calling", + "variant_calling/tiddit", + "variant_calling/tiddit/sample1", + "variant_calling/tiddit/sample1/sample1.tiddit.ploidies.tab", + "variant_calling/tiddit/sample1/sample1.tiddit.vcf.gz", + "variant_calling/tiddit/sample1/sample1.tiddit.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,49fd828d21d79a5c9430cdb30d7f0126", + "mosdepth-cumcoverage-dist-id.txt:md5,edd8dfc8023e629e476e01c3a1448d30", + "mosdepth_perchrom.txt:md5,49fd828d21d79a5c9430cdb30d7f0126", + "multiqc_citations.txt:md5,e49571d90d78a0d1bc919b82d7dc48c6", + "samtools-stats-dp.txt:md5,41d36c1a8413d565788a1a364b467606", + "samtools_alignment_plot.txt:md5,35f8fd2a557568b2237193f46afbab5c", + "sample1.tiddit.bcftools_stats.txt:md5,ee25406b8d3ed73eaa8a66972c805c1f", + "sample1.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample1.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample1.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample1.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample1.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample1.tiddit.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample1.tiddit.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a" + ], + "No BAM files", + "No CRAM files", + [ + "sample1.tiddit.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:54:42.930330768" + }, + "-profile test --tools tiddit tumoronly": { + "content": [ + 12, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIP_TIDDIT_SV": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TIDDIT_SV": { + "tiddit": "3.6.1" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/tiddit", + "reports/bcftools/tiddit/sample2", + "reports/bcftools/tiddit/sample2/sample2.tiddit.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample2", + "reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz", + "reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample2", + "reports/samtools/sample2/sample2.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/tiddit", + "reports/vcftools/tiddit/sample2", + "reports/vcftools/tiddit/sample2/sample2.tiddit.FILTER.summary", + "reports/vcftools/tiddit/sample2/sample2.tiddit.TsTv.count", + "reports/vcftools/tiddit/sample2/sample2.tiddit.TsTv.qual", + "variant_calling", + "variant_calling/tiddit", + "variant_calling/tiddit/sample2", + "variant_calling/tiddit/sample2/sample2.tiddit.ploidies.tab", + "variant_calling/tiddit/sample2/sample2.tiddit.vcf.gz", + "variant_calling/tiddit/sample2/sample2.tiddit.vcf.gz.tbi" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,ff38f1e35ce12244c751921db673b23c", + "mosdepth-cumcoverage-dist-id.txt:md5,8398d1127a10d0c002831ddedfb9713b", + "mosdepth_perchrom.txt:md5,ff38f1e35ce12244c751921db673b23c", + "multiqc_citations.txt:md5,e49571d90d78a0d1bc919b82d7dc48c6", + "samtools-stats-dp.txt:md5,6618ece77181051a58275f504f67ea5b", + "samtools_alignment_plot.txt:md5,6136e5e1d072f166f280fb79424c392f", + "sample2.tiddit.bcftools_stats.txt:md5,82343c0b28dace889f164bbb256a8461", + "sample2.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample2.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample2.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample2.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample2.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample2.tiddit.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample2.tiddit.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a" + ], + "No BAM files", + "No CRAM files", + [ + "sample2.tiddit.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:57:29.483541412" + }, + "-profile test --tools tiddit somatic": { + "content": [ + 23, + { + "BCFTOOLS_STATS": { + "bcftools": 1.21 + }, + "CREATE_INTERVALS_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH": { + "mosdepth": "0.3.10" + }, + "SAMTOOLS_STATS": { + "samtools": 1.21 + }, + "SVDB_MERGE": { + "bcftools": 1.21, + "svdb": "2.8.2" + }, + "TABIX_BGZIPTABIX_INTERVAL_COMBINED": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIPTABIX_INTERVAL_SPLIT": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TABIX_BGZIP_TIDDIT_SV": { + "bgzip": "1.21", + "tabix": "1.21" + }, + "TIDDIT_SV": { + "tiddit": "3.6.1" + }, + "VCFTOOLS_TSTV_COUNT": { + "vcftools": "0.1.16" + } + }, + [ + "csv", + "csv/variantcalled.csv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/vcftools_tstv_by_count.txt", + "multiqc/multiqc_data/vcftools_tstv_by_qual.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/vcftools_tstv_by_count.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/vcftools_tstv_by_count.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/vcftools_tstv_by_count.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_sarek_software_mqc_versions.yml", + "reference", + "reports", + "reports/bcftools", + "reports/bcftools/tiddit", + "reports/bcftools/tiddit/sample3", + "reports/bcftools/tiddit/sample3/sample3.tiddit.bcftools_stats.txt", + "reports/bcftools/tiddit/sample4_vs_sample3", + "reports/bcftools/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.bcftools_stats.txt", + "reports/mosdepth", + "reports/mosdepth/sample3", + "reports/mosdepth/sample3/sample3.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample3/sample3.recal.mosdepth.summary.txt", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz", + "reports/mosdepth/sample3/sample3.recal.regions.bed.gz.csi", + "reports/mosdepth/sample4", + "reports/mosdepth/sample4/sample4.recal.mosdepth.global.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.region.dist.txt", + "reports/mosdepth/sample4/sample4.recal.mosdepth.summary.txt", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz", + "reports/mosdepth/sample4/sample4.recal.regions.bed.gz.csi", + "reports/samtools", + "reports/samtools/sample3", + "reports/samtools/sample3/sample3.recal.cram.stats", + "reports/samtools/sample4", + "reports/samtools/sample4/sample4.recal.cram.stats", + "reports/vcftools", + "reports/vcftools/tiddit", + "reports/vcftools/tiddit/sample3", + "reports/vcftools/tiddit/sample3/sample3.tiddit.FILTER.summary", + "reports/vcftools/tiddit/sample3/sample3.tiddit.TsTv.count", + "reports/vcftools/tiddit/sample3/sample3.tiddit.TsTv.qual", + "reports/vcftools/tiddit/sample4_vs_sample3", + "reports/vcftools/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.FILTER.summary", + "reports/vcftools/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.TsTv.count", + "reports/vcftools/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.TsTv.qual", + "variant_calling", + "variant_calling/tiddit", + "variant_calling/tiddit/sample3", + "variant_calling/tiddit/sample3/sample3.tiddit.ploidies.tab", + "variant_calling/tiddit/sample3/sample3.tiddit.vcf.gz", + "variant_calling/tiddit/sample3/sample3.tiddit.vcf.gz.tbi", + "variant_calling/tiddit/sample4_vs_sample3", + "variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.normal.vcf.gz", + "variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.normal.vcf.gz.tbi", + "variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.ploidies.tab", + "variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.tumor.vcf.gz", + "variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.tumor.vcf.gz.tbi", + "variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.vcf.gz" + ], + [ + "mosdepth-coverage-per-contig-single.txt:md5,336d786b273c4d4e714d51c44207ff56", + "mosdepth-cumcoverage-dist-id.txt:md5,0cc87c596a0fc30ebb676c4587e986de", + "mosdepth_perchrom.txt:md5,336d786b273c4d4e714d51c44207ff56", + "multiqc_citations.txt:md5,e49571d90d78a0d1bc919b82d7dc48c6", + "samtools-stats-dp.txt:md5,e0a8d8867064083908a8ca1ea782d7ac", + "samtools_alignment_plot.txt:md5,f4b1a7cef760291172144a8614b4a1cd", + "sample3.tiddit.bcftools_stats.txt:md5,b8a60370884c8f2c94baa7d3e859492f", + "sample4_vs_sample3.tiddit_sv_merge.bcftools_stats.txt:md5,82f123b157211ac9b8d3b145bbea2147", + "sample3.recal.mosdepth.global.dist.txt:md5,d9a4dd6429560b2b647da346050766c5", + "sample3.recal.mosdepth.region.dist.txt:md5,1f3dab381958e08eb00f7c5e1135f677", + "sample3.recal.mosdepth.summary.txt:md5,d7676e7c1de851b0ee5185d21096123b", + "sample3.recal.regions.bed.gz:md5,6edeb8f7041a4403cb73651744b5bc82", + "sample3.recal.regions.bed.gz.csi:md5,5fc6f880df27ca754ab229f0ccad2aea", + "sample4.recal.mosdepth.global.dist.txt:md5,53f9ae9ab5002ffba340fa8cef7d70e4", + "sample4.recal.mosdepth.region.dist.txt:md5,17600d21ac453506c52249cf435ad8ea", + "sample4.recal.mosdepth.summary.txt:md5,7141030385af1f653718c9e0c9a5be80", + "sample4.recal.regions.bed.gz:md5,c680c5d75f0cea068e3f917f4cf9bf52", + "sample4.recal.regions.bed.gz.csi:md5,68b7a9a98053b1122bdca68a1e1c87dd", + "sample3.tiddit.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample3.tiddit.TsTv.count:md5,fa27f678965b7cba6a92efcd039f802a", + "sample4_vs_sample3.tiddit_sv_merge.FILTER.summary:md5,1ce42d34e4ae919afb519efc99146423", + "sample4_vs_sample3.tiddit_sv_merge.TsTv.count:md5,8dcfdbcaac118df1d5ad407dd2af699f" + ], + "No BAM files", + "No CRAM files", + [ + "sample3.tiddit.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample4_vs_sample3.tiddit.normal.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample4_vs_sample3.tiddit.tumor.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample4_vs_sample3.tiddit_sv_merge.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "No warnings" + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-30T23:56:22.640069514" + } +} diff --git a/tower.yml b/tower.yml new file mode 100644 index 0000000000..5e679d223b --- /dev/null +++ b/tower.yml @@ -0,0 +1,59 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + "**/umi/*_umi_histogram.txt": + display: "All UMI histograms" + "**/reports/fastp/*/*_fastp.html": + display: "FASTP report" + "**/reports/mosdepth/*/*.mosdepth.summary.txt": + display: "All samples summary of mean depths per chromosome and within specified regions per chromosome" + "**/csv/*.csv": + display: "All CSV files to restart nf-core/sarek at a different step" + "**/variantcalling/ascat/*/*.tumour.ASPCF.png": + display: "ASCAT: All allele-specific copy number segmentation images" + "**/variantcalling/ascat/*/*.before_correction_Tumour.*.png": + display: "ASCAT: All samples logR and BAF values" + "**/variantcalling/ascat/*/*.after_correction_GC_Tumour.*.png": + display: "ASCAT: All samples GC and RT corrected logR and BAF values" + "**/variantcalling/ascat/*/*.tumour.sunrise.png": + display: "ASCAT: Range of ploidy and tumor percentage values" + "**/variantcalling/ascat/*/*.metrics.txt": + display: "ASCAT: Multiple metrics information" + "**/variantcalling/ascat/*/*.cnvs.txt": + display: "ASCAT: CNVS information" + "**/variantcalling/ascat/*/*.purityploidy.txt": + display: "ASCAT: Purity and ploidy information" + "**/variantcalling/ascat/*/*.segments.txt": + display: "ASCAT: copy number segments information" + "**/variantcalling/ascat/*/*_tumourBAF.txt": + display: "ASCAT: beta allele frequencies" + "**/variantcalling/ascat/*/*.tumour_*LogR.txt": + display: "ASCAT: total copy number on a logarithmic scale" + "**/variantcalling/cnvkit/*/*-diagram.pdf": + display: "CNVKIT: Copy numbers or segments on chromosomes" + "**/variantcalling/cnvkit/**-scatter.png": + display: "CNVKIT: Bin-level log2 coverages and segmentation calls" + "**/variantcalling/controlfreec/*/config.txt": + display: "Control-FREEC: Configuration file used to run Control-FREEC" + "**/variantcalling/controlfreec/*/*_BAF.png": + display: "Control-FREEC: BAF plot" + "**/variantcalling/controlfreec/*/*_ratio.log2.png": + display: "Control-FREEC: log2 ratio plot" + "**/variantcalling/controlfreec/*/*_ratio.png": + display: "Control-FREEC: ratio plot" + "**/variantcalling/controlfreec/*/*.circos.txt": + display: "Control-FREEC: translated output to the Circos format" + "**/variantcalling/controlfreec/*/*.p.value.txt": + display: "Control-FREEC: CNV file containing p_values for each call" + "**/variantcalling/controlfreec/*/*_BAF.txt": + display: "Control-FREEC: file with beta allele frequencies for each possibly heterozygous SNP position" + "**/variantcalling/controlfreec/*/*_info.txt": + display: "Control-FREEC: parsable file with information about FREEC run" + "**/reports/bcftools/*.bcftools_stats.txt": + display: "All samples raw statistics" + "**/reports/snpeff/*/*/*_snpEff.html": + display: "Statistics and plots for the SnpEff run" + "**/reports/snpeff/*/*/*_snpEff.genes.txt": + display: "TXT (tab separated) summary counts for variants affecting each transcript and gene" + "**/reports/EnsemblVEP/*/*/*_VEP.summary.html": + display: "Summary of the VEP run" diff --git a/workflows/sarek.nf b/workflows/sarek.nf deleted file mode 100644 index 0341043a39..0000000000 --- a/workflows/sarek.nf +++ /dev/null @@ -1,682 +0,0 @@ -/* -======================================================================================== - VALIDATE INPUTS -======================================================================================== -*/ - -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Validate input parameters -WorkflowSarek.initialise(params, log) - -// Check input path parameters to see if they exist -def checkPathParamList = [ - params.ac_loci, - params.ac_loci_gc, - params.bwa, - params.cadd_indels, - params.cadd_indels_tbi, - params.cadd_wg_snvs, - params.cadd_wg_snvs_tbi, - params.chr_dir, - params.chr_length, - params.dbsnp, - params.dbsnp_tbi, - params.dict, - params.fasta, - params.fasta_fai, - params.germline_resource, - params.germline_resource_tbi, - params.input, - params.intervals, - params.known_indels, - params.known_indels_tbi, - params.mappability, - params.multiqc_config, - params.pon, - params.pon_tbi, - params.snpeff_cache, - //params.target_bed, - params.vep_cache -] - -for (param in checkPathParamList) if (param) file(param, checkIfExists: true) - -// Check mandatory parameters -if (params.input) csv_file = file(params.input) -else { - log.warn "No samplesheet specified, attempting to restart from csv files present in ${params.outdir}" - switch (params.step) { - case 'mapping': exit 1, "Can't start with step $params.step without samplesheet" - case 'prepare_recalibration': csv_file = file("${params.outdir}/preprocessing/csv/markduplicates_no_table.csv", checkIfExists: true); break - case 'recalibrate': csv_file = file("${params.outdir}/preprocessing/csv/markduplicates.csv", checkIfExists: true); break - case 'variant_calling': csv_file = file("${params.outdir}/preprocessing/csv/recalibrated.csv", checkIfExists: true); break - // case 'controlfreec': csv_file = file("${params.outdir}/variant_calling/csv/control-freec_mpileup.csv", checkIfExists: true); break - case 'annotate': csv_file = file("${params.outdir}/variant_calling/csv/recalibrated.csv", checkIfExists: true); break - default: exit 1, "Unknown step $params.step" - } -} - -input_sample = extract_csv(csv_file) - -def save_bam_mapped = params.skip_markduplicates ? true : params.save_bam_mapped ? true : false - -if(params.wes){ - if(!params.intervals.endsWith("bed")){ - exit 1, "Target file must be in BED format" - } -}else{ - if(!params.intervals.endsWith("bed") && !params.intervals.endsWith("interval_list")){ - exit 1, "Interval file must end with .bed or .interval_list" - } -} - -// Save AWS IGenomes file containing annotation version -def anno_readme = params.genomes[params.genome]?.readme -if (anno_readme && file(anno_readme).exists()) { - file("${params.outdir}/genome/").mkdirs() - file(anno_readme).copyTo("${params.outdir}/genome/") -} - -/* -======================================================================================== - IMPORT LOCAL MODULES/SUBWORKFLOWS -======================================================================================== -*/ - -// Initialize file channels based on params, defined in the params.genomes[params.genome] scope -chr_dir = params.chr_dir ? Channel.fromPath(params.chr_dir).collect() : [] -chr_length = params.chr_length ? Channel.fromPath(params.chr_length).collect() : [] -dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.empty() -fasta = params.fasta ? Channel.fromPath(params.fasta).collect() : [] -germline_resource = params.germline_resource ? Channel.fromPath(params.germline_resource).collect() : [] -known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.empty() -loci = params.ac_loci ? Channel.fromPath(params.ac_loci).collect() : [] -loci_gc = params.ac_loci_gc ? Channel.fromPath(params.ac_loci_gc).collect() : [] -mappability = params.mappability ? Channel.fromPath(params.mappability).collect() : [] - -// Initialize value channels based on params, defined in the params.genomes[params.genome] scope -snpeff_db = params.snpeff_db ?: Channel.empty() -vep_cache_version = params.vep_cache_version ?: Channel.empty() -vep_genome = params.vep_genome ?: Channel.empty() -vep_species = params.vep_species ?: Channel.empty() - -// Initialize files channels based on params, not defined within the params.genomes[params.genome] scope -cadd_indels = params.cadd_indels ? Channel.fromPath(params.cadd_indels).collect() : [] -cadd_indels_tbi = params.cadd_indels_tbi ? Channel.fromPath(params.cadd_indels_tbi).collect() : [] -cadd_wg_snvs = params.cadd_wg_snvs ? Channel.fromPath(params.cadd_wg_snvs).collect() : [] -cadd_wg_snvs_tbi = params.cadd_wg_snvs_tbi ? Channel.fromPath(params.cadd_wg_snvs_tbi).collect() : [] -pon = params.pon ? Channel.fromPath(params.pon).collect() : [] -snpeff_cache = params.snpeff_cache ? Channel.fromPath(params.snpeff_cache).collect() : [] -//target_bed = params.target_bed ? Channel.fromPath(params.target_bed).collect() : [] -vep_cache = params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : [] - -// Initialize value channels based on params, not defined within the params.genomes[params.genome] scope -umi_read_structure = params.umi_read_structure ? "${params.umi_read_structure} ${params.umi_read_structure}": Channel.empty() - - -// SUBWORKFLOWS: Consisting of a mix of local and nf-core/modules - -// Create samplesheets to restart from different steps -include { MAPPING_CSV } from '../subworkflows/local/mapping_csv' -include { MARKDUPLICATES_CSV } from '../subworkflows/local/markduplicates_csv' -include { PREPARE_RECALIBRATION_CSV } from '../subworkflows/local/prepare_recalibration_csv' -include { RECALIBRATE_CSV } from '../subworkflows/local/recalibrate_csv' - -// Build indices if needed -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' - -// Convert BAM files to FASTQ files -include { ALIGNMENT_TO_FASTQ } from '../subworkflows/local/bam2fastq' - -// Map input reads to reference genome -include { GATK4_MAPPING } from '../subworkflows/nf-core/gatk4_mapping/main' - -// Mark duplicates (+QC) + convert to CRAM -include { MARKDUPLICATES } from '../subworkflows/nf-core/markduplicates' - -// Create recalibration tables -include { PREPARE_RECALIBRATION } from '../subworkflows/nf-core/prepare_recalibration' - -// Create recalibrated cram files to use for variant calling (+QC) -include { RECALIBRATE } from '../subworkflows/nf-core/recalibrate' - -// Variant calling on a single normal sample -include { GERMLINE_VARIANT_CALLING } from '../subworkflows/local/germline_variant_calling' - -// Variant calling on a single tumor sample -include { TUMOR_ONLY_VARIANT_CALLING} from '../subworkflows/local/tumor_variant_calling' - -// Variant calling on tumor/normal pair -include { PAIR_VARIANT_CALLING } from '../subworkflows/local/pair_variant_calling' - -// Annotation -include { ANNOTATE } from '../subworkflows/local/annotate' addParams( - annotation_cache: params.annotation_cache -) - -/* -======================================================================================== - IMPORT NF-CORE MODULES/SUBWORKFLOWS -======================================================================================== -*/ - -// Config files -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yaml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() - -// -// SUBWORKFLOWS -// - -include { FASTQC_TRIMGALORE } from '../subworkflows/nf-core/fastqc_trimgalore' - -// Create umi consensus bams from fastq -include { CREATE_UMI_CONSENSUS } from '../subworkflows/nf-core/fgbio_create_umi_consensus/main' - -// -// MODULES: Installed directly from nf-core/modules -// - -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' -include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' - -def multiqc_report = [] - -workflow SAREK { - - ch_versions = Channel.empty() - qc_reports = Channel.empty() - - // Build indices if needed - PREPARE_GENOME( - dbsnp, - fasta, - params.fasta_fai, - germline_resource, - known_indels, - pon, - params.tools, - params.step) - - // Gather built indices or get them from the params - bwa = params.fasta ? params.bwa ? Channel.fromPath(params.bwa).collect() : PREPARE_GENOME.out.bwa : [] - dict = params.fasta ? params.dict ? Channel.fromPath(params.dict).collect() : PREPARE_GENOME.out.dict : [] - fasta_fai = params.fasta ? params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : PREPARE_GENOME.out.fasta_fai : [] - dbsnp_tbi = params.dbsnp ? params.dbsnp_tbi ? Channel.fromPath(params.dbsnp_tbi).collect() : PREPARE_GENOME.out.dbsnp_tbi : Channel.empty() - germline_resource_tbi = params.germline_resource ? params.germline_resource_tbi ? Channel.fromPath(params.germline_resource_tbi).collect() : PREPARE_GENOME.out.germline_resource_tbi : [] - known_indels_tbi = params.known_indels ? params.known_indels_tbi ? Channel.fromPath(params.known_indels_tbi).collect() : PREPARE_GENOME.out.known_indels_tbi : Channel.empty() - pon_tbi = params.pon ? params.pon_tbi ? Channel.fromPath(params.pon_tbi).collect() : PREPARE_GENOME.out.pon_tbi : [] - msisensorpro_scan = PREPARE_GENOME.out.msisensorpro_scan - intervals_bed_combined = (params.intervals && params.wes) ? Channel.fromPath(params.intervals).collect() : [] - - //TODO @Rike, is this working for you? Now it is, fixed a bug in prepare_genome.nf after chasing smoke for a while - // known_sites is made by grouping both the dbsnp and the known indels ressources - // Which can either or both be optional - // Actually BQSR has been throughing erros if no sides were provided so it must be at lest one - known_sites = dbsnp.concat(known_indels).collect() - known_sites_tbi = dbsnp_tbi.concat(known_indels_tbi).collect() - - // Intervals for speed up preprocessing/variant calling by spread/gather - intervals = PREPARE_GENOME.out.intervals_bed // multiple interval.bed files, divided by useful intervals for scatter/gather - intervals_bed_gz_tbi = PREPARE_GENOME.out.intervals_bed_gz_tbi// multiple interval.bed.gz/.tbi files, divided by useful intervals for scatter/gather - intervals_bed_combined_gz_tbi = PREPARE_GENOME.out.intervals_combined_bed_gz_tbi.collect() // one file containing all intervals interval.bed.gz/.tbi file - intervals_bed_combined_gz = intervals_bed_combined_gz_tbi.map{ bed, tbi -> [bed]}.collect() // one file containing all intervals interval.bed.gz file - - num_intervals = 0 - intervals.count().map{ num_intervals = it } - - // Get versions from all software used - ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) - - // PREPROCESSING - - bam_mapped = Channel.empty() - bam_mapped_qc = Channel.empty() - bam_recalibrated_qc = Channel.empty() - bam_variant_calling = Channel.empty() - - // STEP 0: QC & TRIM - // `--d fastqc` to skip fastqc - // trim only with `--trim_fastq` - // additional options to be set up - - if (params.step == 'mapping') { - - if(params.is_bam_input){ - ALIGNMENT_TO_FASTQ(input_sample, []) - ALIGNMENT_TO_FASTQ.out.reads.set{input_sample_converted} - ch_versions = ch_versions.mix(ALIGNMENT_TO_FASTQ.out.versions) - }else{ - input_sample_converted = input_sample - } - - FASTQC_TRIMGALORE( - input_sample_converted, - ('fastqc' in params.skip_qc), - !(params.trim_fastq)) - - // Get reads after optional trimming (+QC) - reads_input = FASTQC_TRIMGALORE.out.reads - - // Get all qc reports for MultiQC - qc_reports = qc_reports.mix(FASTQC_TRIMGALORE.out.fastqc_zip.collect{it[1]}.ifEmpty([])) - qc_reports = qc_reports.mix(FASTQC_TRIMGALORE.out.trim_log.collect{it[1]}.ifEmpty([])) - qc_reports = qc_reports.mix(FASTQC_TRIMGALORE.out.trim_zip.collect{it[1]}.ifEmpty([])) - - // Get versions from all software used - ch_versions = ch_versions.mix(FASTQC_TRIMGALORE.out.versions) - - //Since read need additional mapping afterwards, I would argue for having the process here - if(params.umi_read_structure){ - CREATE_UMI_CONSENSUS(reads_input, fasta, bwa, umi_read_structure, params.group_by_umi_strategy, params.aligner) - ALIGNMENT_TO_FASTQ( CREATE_UMI_CONSENSUS.out.consensusbam, [] ) - ALIGNMENT_TO_FASTQ.out.reads.set{reads_input} - - ch_versions = ch_versions.mix(CREATE_UMI_CONSENSUS.out.versions) - ch_versions = ch_versions.mix(ALIGNMENT_TO_FASTQ.out.versions) - } - - // STEP 1: MAPPING READS TO REFERENCE GENOME - GATK4_MAPPING( - params.aligner, - bwa, - fasta, - fasta_fai, - reads_input, - params.skip_markduplicates, - save_bam_mapped) - - // Get mapped reads (BAM) with and without index - // without index: always contains mapped_bams, only used if duplicate marking is done - // with Index: Duplicate marking is skipped and/or bams are saved, else empty Channel - bam_mapped = GATK4_MAPPING.out.bam - bam_indexed = GATK4_MAPPING.out.bam_indexed - - // Create CSV to restart from this step - // TODO: How is this handeled if not save_bam is set (no index should be present) - //MAPPING_CSV(bam_indexed, save_bam_mapped, params.skip_markduplicates) - - // Get versions from all software used - ch_versions = ch_versions.mix(GATK4_MAPPING.out.versions) - } - - // Comment out till we get the tests to pass - if (params.step == 'prepare_recalibration') { - bam_indexed = Channel.empty() - bam_mapped = Channel.empty() - - if(params.skip_markduplicates){ - bam_indexed = input_sample - }else{ //index will be created down the road from the Markduplicatess - input_sample.map{meta, bam, bai -> - [meta, bam] - }.set{bam_mapped} - } - } - - if (params.step in ['mapping', 'prepare_recalibration']) { - - // STEP 2: Mark duplicates (+QC) + convert to CRAM - MARKDUPLICATES( - bam_mapped, - bam_indexed, - ('markduplicates' in params.use_gatk_spark), - !('markduplicates' in params.skip_qc), - dict, - fasta, - fasta_fai, - params.skip_markduplicates, - ('bamqc' in params.skip_qc), - ('samtools' in params.skip_qc), - ('deeptools' in params.skip_qc), - intervals_bed_combined) - - cram_markduplicates = MARKDUPLICATES.out.cram - - // Create CSV to restart from this step - MARKDUPLICATES_CSV(cram_markduplicates) - - qc_reports = qc_reports.mix(MARKDUPLICATES.out.qc.collect{it[1]}.ifEmpty([])) - - ch_versions = ch_versions.mix(MARKDUPLICATES.out.versions) - - // STEP 3: Create recalibration tables - if(!params.skip_bqsr){ - - PREPARE_RECALIBRATION( - cram_markduplicates, - ('bqsr' in params.use_gatk_spark), - dict, - fasta, - fasta_fai, - intervals, - num_intervals, - known_sites, - known_sites_tbi, - params.no_intervals) - - table_bqsr = PREPARE_RECALIBRATION.out.table_bqsr - PREPARE_RECALIBRATION_CSV(table_bqsr) - - cram_applybqsr = cram_markduplicates.join(table_bqsr) - - ch_versions = ch_versions.mix(PREPARE_RECALIBRATION.out.versions) - } - } - - if (params.step == 'recalibrate') bam_applybqsr = input_sample - - if (params.step in ['mapping', 'prepare_recalibration', 'recalibrate']) { - - if(!params.skip_bqsr){ - // STEP 4: RECALIBRATING - RECALIBRATE( - ('bqsr' in params.use_gatk_spark), - ('bamqc' in params.skip_qc), - ('samtools' in params.skip_qc), - cram_applybqsr, - dict, - fasta, - fasta_fai, - intervals, - num_intervals, - params.no_intervals, - intervals_bed_combined - ) - - cram_recalibrated = RECALIBRATE.out.cram - cram_recalibrated_qc = RECALIBRATE.out.qc - - RECALIBRATE_CSV(cram_recalibrated) - - qc_reports = qc_reports.mix(cram_recalibrated_qc.collect{it[1]}.ifEmpty([])) - cram_variant_calling = cram_recalibrated - - ch_versions = ch_versions.mix(RECALIBRATE.out.versions) - - }else{ - cram_variant_calling = cram_markduplicates - } - - } - - if (params.step in 'variant_calling') cram_variant_calling = input_sample - - if (params.tools) { - - vcf_to_annotate = Channel.empty() - if (params.step in 'annotate') cram_variant_calling = Channel.empty() - - // - // Logic to separate germline samples, tumor samples with no matched normal, and combine tumor-normal pairs - // - cram_variantcalling = Channel.empty() - cram_variant_calling.branch{ - normal: it[0].status == 0 - tumor: it[0].status == 1 - }.set{cram_variantcalling} - - // All Germline samples - cram_variant_calling_normal_cross = Channel.empty() - cram_variantcalling.normal.map{ meta, cram, crai -> - [meta.patient, meta, cram, crai] - }.set{cram_variant_calling_normal_cross} - - // All tumor samples - cram_variant_calling_tumor_cross = Channel.empty() - cram_variantcalling.tumor.map{ meta, cram, crai -> - [meta.patient, meta, cram, crai] - }.set{cram_variant_calling_tumor_cross} - - //Tumor only samples - // 1. Group together all tumor samples by patient ID [patient1, [meta1, meta2], [cram1,crai1, cram2, crai2]] - cram_variant_calling_tumor_grouped = Channel.empty() - - //Downside: this only works by waiting for all tumor samples to finish preprocessing, since no group size is provided - cram_variant_calling_tumor_cross.groupTuple().set{ cram_variant_calling_tumor_grouped } - - // 2. Join with normal samples, in each channel there is one key per patient now. Patients without matched normal end up with: [patient1, [meta1, meta2], [cram1,crai1, cram2, crai2], null] - cram_variant_calling_tumor_joined = Channel.empty() - cram_variant_calling_tumor_grouped.join(cram_variant_calling_normal_cross, remainder: true).set{cram_variant_calling_tumor_joined} - - // 3. Filter out entries with last entry null - cram_variant_calling_tumor_filtered = Channel.empty() - cram_variant_calling_tumor_joined.filter{ it -> !(it.last())}.set{cram_variant_calling_tumor_filtered} - - // 4. Transpose [patient1, [meta1, meta2], [cram1,crai1, cram2, crai2]] back to [patient1, meta1, [cram1,crai1], null] [patient1, meta2, [cram2,crai2], null] - // and remove patient ID field & null value for further processing [meta1, [cram1,crai1]] [meta2, [cram2,crai2]] - cram_variant_calling_tumor_only = Channel.empty() - cram_variant_calling_tumor_filtered.transpose().map{ it -> [it[1], it[2], it[3]]}.set{cram_variant_calling_tumor_only} - - // Tumor - normal pairs - // Use cross to combine normal with all tumor samples, i.e. multi tumor samples from recurrences - cram_variant_calling_pair = cram_variant_calling_normal_cross.cross(cram_variant_calling_tumor_cross) - .map { normal, tumor -> - def meta = [:] - meta.patient = normal[0] - meta.normal_id = normal[1].sample - meta.tumor_id = tumor[1].sample - meta.gender = normal[1].gender - meta.id = "${meta.tumor_id}_vs_${meta.normal_id}".toString() - - [meta, normal[2], normal[3], tumor[2], tumor[3]] - } - - // GERMLINE VARIANT CALLING - GERMLINE_VARIANT_CALLING( - params.tools, - cram_variantcalling.normal, - dbsnp, - dbsnp_tbi, - dict, - fasta, - fasta_fai, - intervals, - intervals_bed_gz_tbi, - intervals_bed_combined_gz_tbi, - intervals_bed_combined_gz, - num_intervals, - params.no_intervals, - params.joint_germline - ) - - vcf_to_annotate = vcf_to_annotate.mix(GERMLINE_VARIANT_CALLING.out.deepvariant_vcf) - vcf_to_annotate = vcf_to_annotate.mix(GERMLINE_VARIANT_CALLING.out.freebayes_vcf) - vcf_to_annotate = vcf_to_annotate.mix(GERMLINE_VARIANT_CALLING.out.haplotypecaller_gvcf) - vcf_to_annotate = vcf_to_annotate.mix(GERMLINE_VARIANT_CALLING.out.manta_vcf) - vcf_to_annotate = vcf_to_annotate.mix(GERMLINE_VARIANT_CALLING.out.strelka_vcf) - ch_versions = ch_versions.mix(GERMLINE_VARIANT_CALLING.out.versions) - - // TUMOR ONLY VARIANT CALLING - TUMOR_ONLY_VARIANT_CALLING( - params.tools, - cram_variant_calling_tumor_only, - dbsnp, - dbsnp_tbi, - dict, - fasta, - fasta_fai, - intervals, - intervals_bed_gz_tbi, - intervals_bed_combined_gz_tbi, - intervals_bed_combined_gz, - num_intervals, - params.no_intervals, - germline_resource, - germline_resource_tbi, - pon, - pon_tbi - ) - vcf_to_annotate = vcf_to_annotate.mix(TUMOR_ONLY_VARIANT_CALLING.out.freebayes_vcf) - vcf_to_annotate = vcf_to_annotate.mix(TUMOR_ONLY_VARIANT_CALLING.out.mutect2_vcf) - vcf_to_annotate = vcf_to_annotate.mix(TUMOR_ONLY_VARIANT_CALLING.out.manta_vcf) - vcf_to_annotate = vcf_to_annotate.mix(TUMOR_ONLY_VARIANT_CALLING.out.strelka_vcf) - ch_versions = ch_versions.mix(TUMOR_ONLY_VARIANT_CALLING.out.versions) - - // PAIR VARIANT CALLING - PAIR_VARIANT_CALLING( - params.tools, - cram_variant_calling_pair, - dbsnp, - dbsnp_tbi, - dict, - fasta, - fasta_fai, - intervals, - intervals_bed_gz_tbi, - intervals_bed_combined_gz_tbi, - intervals_bed_combined_gz, - num_intervals, - params.no_intervals, - msisensorpro_scan, - germline_resource, - germline_resource_tbi, - pon, - pon_tbi) - - vcf_to_annotate = vcf_to_annotate.mix(PAIR_VARIANT_CALLING.out.mutect2_vcf) - vcf_to_annotate = vcf_to_annotate.mix(PAIR_VARIANT_CALLING.out.manta_vcf) - vcf_to_annotate = vcf_to_annotate.mix(PAIR_VARIANT_CALLING.out.strelka_vcf) - ch_versions = ch_versions.mix(PAIR_VARIANT_CALLING.out.versions) - - - // ANNOTATE - if (params.step == 'annotate') vcf_to_annotate = input_sample - - if (params.tools.contains('merge') || params.tools.contains('snpeff') || params.tools.contains('vep')) { - - ANNOTATE( - vcf_to_annotate, - params.tools, - snpeff_db, - snpeff_cache, - vep_genome, - vep_species, - vep_cache_version, - vep_cache) - ch_versions = ch_versions.mix(ANNOTATE.out.versions) - } - } - - ch_version_yaml = Channel.empty() - if (!('versions' in params.skip_qc)) { - CUSTOM_DUMPSOFTWAREVERSIONS(ch_versions.unique().collectFile(name: 'collated_versions.yml')) - ch_version_yaml = CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect() - } - - // workflow_summary = WorkflowSarek.paramsSummaryMultiqc(workflow, summary_params) - // ch_workflow_summary = Channel.value(workflow_summary) - - // ch_multiqc_files = Channel.empty() - // ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_config) - // ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) - // ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - // ch_multiqc_files = ch_multiqc_files.mix(ch_version_yaml) - // ch_multiqc_files = ch_multiqc_files.mix(qc_reports) - - // multiqc_report = Channel.empty() - // if (!('multiqc' in params.skip_qc)) { - // MULTIQC(ch_multiqc_files.collect()) - // multiqc_report = MULTIQC.out.report.toList() - - // } -} - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) -} - -// Function to extract information (meta data + file(s)) from csv file(s) - -is_bam_input = false -def extract_csv(csv_file) { - Channel.from(csv_file).splitCsv(header: true) - //Retrieves number of lanes by grouping together by patient and sample and counting how many entries there are for this combination - .map{ row -> - if (!(row.patient && row.sample)) log.warn "Missing or unknown field in csv file header" - [[row.patient.toString(), row.sample.toString()], row] - }.groupTuple() - .map{ meta, rows -> - size = rows.size() - [rows, size] - }.transpose() - .map{ row, numLanes -> //from here do the usual thing for csv parsing - def meta = [:] - - //TODO since it is mandatory: error/warning if not present? - // Meta data to identify samplesheet - // Both patient and sample are mandatory - // Several sample can belong to the same patient - // Sample should be unique for the patient - if (row.patient) meta.patient = row.patient.toString() - if (row.sample) meta.sample = row.sample.toString() - - // If no gender specified, gender is not considered - // gender is only mandatory for somatic CNV - if (row.gender) meta.gender = row.gender.toString() - else meta.gender = "NA" - - // If no status specified, sample is assumed normal - if (row.status) meta.status = row.status.toInteger() - else meta.status = 0 - - // mapping with fastq - if (row.lane && row.fastq_2) { - meta.id = "${row.sample}-${row.lane}".toString() - def fastq_1 = file(row.fastq_1, checkIfExists: true) - def fastq_2 = file(row.fastq_2, checkIfExists: true) - def CN = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : '' - def read_group = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:ILLUMINA\"" - meta.numLanes = numLanes.toInteger() - meta.read_group = read_group.toString() - return [meta, [fastq_1, fastq_2]] - // start from BAM - } else if (row.lane && row.bam) { - meta.id = "${row.sample}-${row.lane}".toString() - def bam = file(row.bam, checkIfExists: true) - def CN = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : '' - def read_group = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:ILLUMINA\"" - meta.numLanes = numLanes.toInteger() - meta.read_group = read_group.toString() - is_bam_input = true - return [meta, bam] - // recalibration - } else if (row.table && row.cram) { - meta.id = meta.sample - def cram = file(row.cram, checkIfExists: true) - def crai = file(row.crai, checkIfExists: true) - def table = file(row.table, checkIfExists: true) - return [meta, cram, crai, table] - // recalibration when skipping MarkDuplicates - } else if (row.table && row.bam) { - meta.id = meta.sample - def bam = file(row.bam, checkIfExists: true) - def bai = file(row.bai, checkIfExists: true) - def table = file(row.table, checkIfExists: true) - return [meta, bam, bai, table] - // prepare_recalibration or variant_calling - } else if (row.cram) { - meta.id = meta.sample - def cram = file(row.cram, checkIfExists: true) - def crai = file(row.crai, checkIfExists: true) - return [meta, cram, crai] - // prepare_recalibration when skipping MarkDuplicates - } else if (row.bam) { - meta.id = meta.sample - def bam = file(row.bam, checkIfExists: true) - def bai = file(row.bai, checkIfExists: true) - return [meta, bam, bai] - // annotation - } else if (row.vcf) { - meta.id = meta.sample - def vcf = file(row.vcf, checkIfExists: true) - return [meta, vcf] - } else { - log.warn "Missing or unknown field in csv file header" - } - } -} diff --git a/workflows/sarek/main.nf b/workflows/sarek/main.nf new file mode 100644 index 0000000000..f1248e0459 --- /dev/null +++ b/workflows/sarek/main.nf @@ -0,0 +1,715 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from 'plugin/nf-core-utils' +include { methodsDescriptionText } from '../../subworkflows/local/utils_nfcore_sarek_pipeline' + +// Create samplesheets to restart from different steps +include { CHANNEL_VARIANT_CALLING_CREATE_CSV } from '../../subworkflows/local/channel_variant_calling_create_csv' + +// Convert BAM files to FASTQ files +include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../../subworkflows/local/bam_convert_samtools' + +// Convert fastq.gz.spring files to fastq.gz files +include { SPRING_DECOMPRESS as SPRING_DECOMPRESS_TO_R1_FQ } from '../../modules/nf-core/spring/decompress' +include { SPRING_DECOMPRESS as SPRING_DECOMPRESS_TO_R2_FQ } from '../../modules/nf-core/spring/decompress' +include { SPRING_DECOMPRESS as SPRING_DECOMPRESS_TO_FQ_PAIR } from '../../modules/nf-core/spring/decompress' + +// Run FASTQC +include { FASTQC } from '../../modules/nf-core/fastqc' + +// QC on CRAM +include { CRAM_SAMPLEQC } from '../../subworkflows/local/cram_sampleqc' + +// Preprocessing +include { FASTQ_PREPROCESS_GATK } from '../../subworkflows/local/fastq_preprocess_gatk' +include { FASTQ_PREPROCESS_PARABRICKS } from '../../subworkflows/local/fastq_preprocess_parabricks' + +// CRAM_TO_BAM conversion +include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../../modules/nf-core/samtools/convert' + +// Variant calling on a single normal sample +include { BAM_VARIANT_CALLING_GERMLINE_ALL } from '../../subworkflows/local/bam_variant_calling_germline_all' + +// Variant calling on a single tumor sample +include { BAM_VARIANT_CALLING_TUMOR_ONLY_ALL } from '../../subworkflows/local/bam_variant_calling_tumor_only_all' + +// Variant calling on tumor/normal pair +include { BAM_VARIANT_CALLING_SOMATIC_ALL } from '../../subworkflows/local/bam_variant_calling_somatic_all' + +// POST VARIANTCALLING: e.g. merging +include { POST_VARIANTCALLING } from '../../subworkflows/local/post_variantcalling' + +// QC on VCF files +include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../../subworkflows/local/vcf_qc_bcftools_vcftools' + +// Annotation +include { VCF_ANNOTATE_ALL } from '../../subworkflows/local/vcf_annotate_all' + +// MULTIQC +include { MULTIQC } from '../../modules/nf-core/multiqc' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow SAREK { + take: + input_sample + aligner + skip_tools + step + tools + ascat_alleles + ascat_loci + ascat_loci_gc + ascat_loci_rt + bbsplit_index + bcftools_annotations + bcftools_annotations_tbi + bcftools_columns + bcftools_header_lines + cf_chrom_len + chr_files + cnvkit_reference + dbsnp + dbsnp_tbi + dbsnp_vqsr + dict + fasta + fasta_fai + germline_resource + germline_resource_tbi + index_alignment + intervals_and_num_intervals + intervals_bed_combined + intervals_bed_combined_for_variant_calling + intervals_bed_gz_tbi_and_num_intervals + intervals_bed_gz_tbi_combined + intervals_for_preprocessing + known_indels_vqsr + known_sites_indels + known_sites_indels_tbi + known_sites_snps + known_sites_snps_tbi + known_snps_vqsr + mappability + msisensor2_models + msisensorpro_scan + ngscheckmate_bed + pon + pon_tbi + sentieon_dnascope_model + varlociraptor_scenario_germline + varlociraptor_scenario_somatic + varlociraptor_scenario_tumor_only + snpeff_cache + snpeff_db + vep_cache + vep_cache_version + vep_extra_files + vep_fasta + vep_genome + vep_species + snpsift_db // channel: [[databases], [tbis], [vardbs], [fields], [prefixes]] + versions + + main: + // To gather all QC reports for MultiQC + ch_multiqc_files = channel.empty() + multiqc_publish = channel.empty() + multiqc_report = channel.empty() + reports = channel.empty() + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + // PREPROCESSING + if (step == 'mapping') { + // Figure out if input is bam, fastq, or spring + input_sample_type = input_sample.branch { + bam: it[0].data_type == "bam" + fastq_gz: it[0].data_type == "fastq_gz" + one_fastq_gz_spring: it[0].data_type == "one_fastq_gz_spring" + two_fastq_gz_spring: it[0].data_type == "two_fastq_gz_spring" + } + + // Two fastq.gz-files + fastq_gz = input_sample_type.fastq_gz.map { meta, files -> addReadgroupToMeta(meta, files) } + + // Just one fastq.gz.spring-file with both R1 and R2 + fastq_gz_pair_from_spring = SPRING_DECOMPRESS_TO_FQ_PAIR(input_sample_type.one_fastq_gz_spring, false) + + one_fastq_gz_from_spring = fastq_gz_pair_from_spring.fastq.map { meta, files -> addReadgroupToMeta(meta, files) } + + // Two fastq.gz.spring-files - one for R1 and one for R2 + r1_fastq_gz_from_spring = SPRING_DECOMPRESS_TO_R1_FQ( + input_sample_type.two_fastq_gz_spring.map { meta, files -> + [meta, files[0]] + }, + true, + ) + r2_fastq_gz_from_spring = SPRING_DECOMPRESS_TO_R2_FQ( + input_sample_type.two_fastq_gz_spring.map { meta, files -> + [meta, files[1]] + }, + true, + ) + + versions = versions.mix(SPRING_DECOMPRESS_TO_R1_FQ.out.versions) + versions = versions.mix(SPRING_DECOMPRESS_TO_R2_FQ.out.versions) + versions = versions.mix(SPRING_DECOMPRESS_TO_FQ_PAIR.out.versions) + + two_fastq_gz_from_spring = r1_fastq_gz_from_spring.fastq.join(r2_fastq_gz_from_spring.fastq).map { meta, fastq_1, fastq_2 -> [meta, [fastq_1, fastq_2]] } + + two_fastq_gz_from_spring = two_fastq_gz_from_spring.map { meta, files -> addReadgroupToMeta(meta, files) } + + // Convert any bam input to fastq + // fasta are not needed when converting bam to fastq -> [ id:"fasta" ], [] + // No need for fasta.fai -> [] + // Currently don't allow interleaved input + interleave_input = false + CONVERT_FASTQ_INPUT( + input_sample_type.bam, + [[id: "fasta"], []], + [[id: 'null'], []], + interleave_input, + ) + + versions = versions.mix(CONVERT_FASTQ_INPUT.out.versions) + + // Gather fastq (inputed or converted) + // Theorically this could work on mixed input (fastq for one sample and bam for another) + // But not sure how to handle that with the samplesheet + // Or if we really want users to be able to do that + input_fastq = fastq_gz.mix(CONVERT_FASTQ_INPUT.out.reads).mix(one_fastq_gz_from_spring).mix(two_fastq_gz_from_spring) + + // QC + // `--skip_tools fastqc` to skip fastqc + if (!(skip_tools.split(',').contains('fastqc'))) { + FASTQC(input_fastq) + + reports = reports.mix(FASTQC.out.zip.collect { _meta, logs -> logs }) + versions = versions.mix(FASTQC.out.versions) + } + } + else { + input_fastq = channel.empty().mix(input_sample) + } + + if (step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate']) { + + if (aligner == 'parabricks') { + // PREPROCESSING WITH PARABRICKS + FASTQ_PREPROCESS_PARABRICKS( + input_fastq, + fasta, + fasta_fai, + index_alignment, + intervals_bed_combined, + known_sites_indels, + channel.value("cram"), + params.save_mapped, + params.save_output_as_bam, + params.outdir, + ) + + // Gather preprocessing output + cram_variant_calling = channel.empty() + cram_variant_calling = cram_variant_calling.mix(FASTQ_PREPROCESS_PARABRICKS.out.cram) + + // Gather used softwares versions + reports = reports.mix(FASTQ_PREPROCESS_PARABRICKS.out.reports) + versions = versions.mix(FASTQ_PREPROCESS_PARABRICKS.out.versions) + } + else { + // PREPROCESSING + FASTQ_PREPROCESS_GATK( + input_fastq, + input_sample, + dict, + fasta, + fasta_fai, + index_alignment, + intervals_and_num_intervals, + intervals_for_preprocessing, + known_sites_indels, + known_sites_indels_tbi, + bbsplit_index, + ) + + // Gather preprocessing output + cram_variant_calling = channel.empty() + cram_variant_calling = cram_variant_calling.mix(FASTQ_PREPROCESS_GATK.out.cram_variant_calling) + + // Gather used softwares versions + reports = reports.mix(FASTQ_PREPROCESS_GATK.out.reports) + versions = versions.mix(FASTQ_PREPROCESS_GATK.out.versions) + } + } + + if (step == 'variant_calling') { + + cram_variant_calling = channel.empty().mix(input_sample) + } + + if (step == 'annotate') { + + cram_variant_calling = channel.empty() + } + + // RUN CRAM QC on the recalibrated CRAM files or when starting from step variant calling. NGSCheckmate should be run also on non-recalibrated CRAM files + CRAM_SAMPLEQC( + cram_variant_calling, + ngscheckmate_bed, + fasta, + skip_tools.split(',').contains('baserecalibrator'), + intervals_for_preprocessing, + ) + + reports = reports.mix(CRAM_SAMPLEQC.out.reports) + versions = versions.mix(CRAM_SAMPLEQC.out.versions) + + if (tools) { + + bam_variant_calling = channel.empty() + + // For cnvkit, msisensor2 and muse we need to use bam input and not cram + if (tools.split(',').contains('cnvkit') || tools.split(',').contains('msisensor2') || tools.split(',').contains('muse')) { + + // Differentiate between bam and cram files + cram_variant_calling_status_tmp = cram_variant_calling.branch { meta, file, index -> + bam: file.toString().endsWith('.bam') + cram: file.toString().endsWith('.cram') + } + + // convert cram files + CRAM_TO_BAM(cram_variant_calling_status_tmp.cram, fasta, fasta_fai) + + // gather all bam files + bam_variant_calling = CRAM_TO_BAM.out.bam + .join(CRAM_TO_BAM.out.bai, by: [0]) + .mix(cram_variant_calling_status_tmp.bam) + .map { meta, bam, bai -> + [meta + [data_type: 'bam'], bam, bai] + } + + versions = versions.mix(CRAM_TO_BAM.out.versions) + } + + // Logic to separate germline samples, tumor samples with no matched normal, and combine tumor-normal pairs + cram_variant_calling_status = cram_variant_calling.branch { meta, file, index -> + normal: meta.status == 0 + tumor: meta.status == 1 + } + + // Follow the same logic with bam as we have with cram + bam_variant_calling_status = bam_variant_calling.branch { meta, file, index -> + normal: meta.status == 0 + tumor: meta.status == 1 + } + + // All Germline samples + cram_variant_calling_normal_to_cross = cram_variant_calling_status.normal.map { meta, cram, crai -> [meta.patient, meta, cram, crai] } + bam_variant_calling_normal_to_cross = bam_variant_calling_status.normal.map { meta, bam, bai -> [meta.patient, meta, bam, bai] } + + // All tumor samples + cram_variant_calling_pair_to_cross = cram_variant_calling_status.tumor.map { meta, cram, crai -> [meta.patient, meta, cram, crai] } + bam_variant_calling_pair_to_cross = bam_variant_calling_status.tumor.map { meta, bam, bai -> [meta.patient, meta, bam, bai] } + + // Tumor only samples + // 1. Group together all tumor samples by patient ID [ patient1, [ meta1, meta2 ], [ cram1, crai1, cram2, crai2 ] ] + + // Downside: this only works by waiting for all tumor samples to finish preprocessing, since no group size is provided + cram_variant_calling_tumor_grouped = cram_variant_calling_pair_to_cross.groupTuple() + bam_variant_calling_tumor_grouped = bam_variant_calling_pair_to_cross.groupTuple() + + // 2. Join with normal samples, in each channel there is one key per patient now. Patients without matched normal end up with: [ patient1, [ meta1, meta2 ], [ cram1, crai1, cram2, crai2 ], null ] + cram_variant_calling_tumor_joined = cram_variant_calling_tumor_grouped.join(cram_variant_calling_normal_to_cross, failOnDuplicate: true, remainder: true) + bam_variant_calling_tumor_joined = bam_variant_calling_tumor_grouped.join(bam_variant_calling_normal_to_cross, failOnDuplicate: true, remainder: true) + + // 3. Filter out entries with last entry null + cram_variant_calling_tumor_filtered = cram_variant_calling_tumor_joined.filter { it -> !(it.last()) } + bam_variant_calling_tumor_filtered = bam_variant_calling_tumor_joined.filter { it -> !(it.last()) } + + // 4. Transpose [ patient1, [ meta1, meta2 ], [ cram1, crai1, cram2, crai2 ] ] back to [ patient1, meta1, [ cram1, crai1 ], null ] [ patient1, meta2, [ cram2, crai2 ], null ] + // and remove patient ID field & null value for further processing [ meta1, [ cram1, crai1 ] ] [ meta2, [ cram2, crai2 ] ] + cram_variant_calling_tumor_only = cram_variant_calling_tumor_filtered.transpose().map { it -> [it[1], it[2], it[3]] } + bam_variant_calling_tumor_only = bam_variant_calling_tumor_filtered.transpose().map { it -> [it[1], it[2], it[3]] } + + if (params.only_paired_variant_calling) { + // Normal only samples + + // 1. Join with tumor samples, in each channel there is one key per patient now. Patients without matched tumor end up with: [ patient1, [ meta1 ], [ cram1, crai1 ], null ] as there is only one matched normal possible + cram_variant_calling_normal_joined = cram_variant_calling_normal_to_cross.join(cram_variant_calling_tumor_grouped, failOnDuplicate: true, remainder: true) + bam_variant_calling_normal_joined = bam_variant_calling_normal_to_cross.join(bam_variant_calling_tumor_grouped, failOnDuplicate: true, remainder: true) + + // 2. Filter out entries with last entry null + cram_variant_calling_normal_filtered = cram_variant_calling_normal_joined.filter { it -> !(it.last()) } + bam_variant_calling_normal_filtered = bam_variant_calling_normal_joined.filter { it -> !(it.last()) } + + // 3. Remove patient ID field & null value for further processing [ meta1, [ cram1, crai1 ] ] [ meta2, [ cram2, crai2 ] ] (no transposing needed since only one normal per patient ID) + cram_variant_calling_status_normal = cram_variant_calling_normal_filtered.map { it -> [it[1], it[2], it[3]] } + bam_variant_calling_status_normal = bam_variant_calling_normal_filtered.map { it -> [it[1], it[2], it[3]] } + } + else { + cram_variant_calling_status_normal = cram_variant_calling_status.normal + bam_variant_calling_status_normal = bam_variant_calling_status.normal + } + + // Tumor - normal pairs + // Use cross to combine normal with all tumor samples, i.e. multi tumor samples from recurrences + cram_variant_calling_pair = cram_variant_calling_normal_to_cross + .cross(cram_variant_calling_pair_to_cross) + .map { normal, tumor -> + def meta = [:] + + meta.id = "${tumor[1].sample}_vs_${normal[1].sample}".toString() + meta.normal_id = normal[1].sample + meta.patient = normal[0] + meta.sex = normal[1].sex + meta.tumor_id = tumor[1].sample + meta.contamination = tumor[1].contamination + + [meta, normal[2], normal[3], tumor[2], tumor[3]] + } + bam_variant_calling_pair = bam_variant_calling_normal_to_cross + .cross(bam_variant_calling_pair_to_cross) + .map { normal, tumor -> + def meta = [:] + + meta.id = "${tumor[1].sample}_vs_${normal[1].sample}".toString() + meta.normal_id = normal[1].sample + meta.patient = normal[0] + meta.sex = normal[1].sex + meta.tumor_id = tumor[1].sample + + [meta, normal[2], normal[3], tumor[2], tumor[3]] + } + + // GERMLINE VARIANT CALLING + // No bwa index for TIDDIT + // intervals handling + // intervals_bed_combined: [] if no_intervals, else interval_bed_combined + // intervals_bed_gz_tbi_combined, [] if no_intervals, else interval_bed_combined_gz_tbi + // intervals_bed_combined_for_variant_calling, no_intervals.bed if no intervals, else interval_bed_combined.bed + // skip_tools.split(',').contains('haplotypecaller_filter') is true if filtering should be skipped + BAM_VARIANT_CALLING_GERMLINE_ALL( + tools, + skip_tools, + bam_variant_calling_status_normal, + cram_variant_calling_status_normal, + [[id: 'bwa'], []], + cnvkit_reference, + dbsnp, + dbsnp_tbi, + dbsnp_vqsr, + dict, + fasta, + fasta_fai, + intervals_and_num_intervals, + intervals_bed_combined, + intervals_bed_gz_tbi_combined, + intervals_bed_combined_for_variant_calling, + intervals_bed_gz_tbi_and_num_intervals, + known_indels_vqsr, + known_sites_indels, + known_sites_indels_tbi, + known_sites_snps, + known_sites_snps_tbi, + known_snps_vqsr, + params.joint_germline, + skip_tools.split(',').contains('haplotypecaller_filter'), + params.sentieon_haplotyper_emit_mode, + params.sentieon_dnascope_emit_mode, + params.sentieon_dnascope_pcr_indel_model, + sentieon_dnascope_model, + ) + + // TUMOR ONLY VARIANT CALLING + // No bwa index for TIDDIT + // intervals handling + // intervals_bed_combined: [] if no_intervals, else interval_bed_combined + // intervals_bed_gz_tbi_combined, [] if no_intervals, else interval_bed_combined_gz_tbi + BAM_VARIANT_CALLING_TUMOR_ONLY_ALL( + tools, + bam_variant_calling_tumor_only, + cram_variant_calling_tumor_only, + [[id: 'bwa'], []], + cf_chrom_len, + chr_files, + cnvkit_reference, + dbsnp, + dbsnp_tbi, + dict, + fasta, + fasta_fai, + germline_resource, + germline_resource_tbi, + intervals_and_num_intervals, + intervals_bed_gz_tbi_and_num_intervals, + intervals_bed_combined, + intervals_bed_gz_tbi_combined, + mappability, + msisensor2_models, + pon, + pon_tbi, + params.joint_mutect2, + params.wes, + ) + + // PAIR VARIANT CALLING + // No bwa index for TIDDIT + // intervals handling + // intervals_bed_combined: [] if no_intervals, else interval_bed_combined + // intervals_bed_gz_tbi_combined, [] if no_intervals, else interval_bed_combined_gz_tbi + BAM_VARIANT_CALLING_SOMATIC_ALL( + tools, + bam_variant_calling_pair, + cram_variant_calling_pair, + [[id: 'bwa'], []], + cf_chrom_len, + chr_files, + dbsnp, + dbsnp_tbi, + dict, + fasta, + fasta_fai, + germline_resource, + germline_resource_tbi, + intervals_and_num_intervals, + intervals_bed_gz_tbi_and_num_intervals, + intervals_bed_combined, + intervals_bed_gz_tbi_combined, + mappability, + msisensorpro_scan, + pon, + pon_tbi, + ascat_alleles, + ascat_loci, + ascat_loci_gc, + ascat_loci_rt, + params.joint_mutect2, + params.wes, + ) + + // QC on raw variant calls + VCF_QC_BCFTOOLS_VCFTOOLS( + BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.vcf_all).mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.vcf_all), + intervals_bed_combined, + ) + + reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.bcftools_stats.collect { _meta, stats -> [stats] }) + reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_counts.collect { _meta, counts -> [counts] }) + reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_qual.collect { _meta, qual -> [qual] }) + reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_filter_summary.collect { _meta, summary -> [summary] }) + reports = reports.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.out_indexcov.collect { _meta, indexcov -> indexcov.flatten() }) + reports = reports.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.out_indexcov.collect { _meta, indexcov -> indexcov.flatten() }) + + // POST VARIANTCALLING + POST_VARIANTCALLING( + tools, + cram_variant_calling_status_normal, + BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all, + BAM_VARIANT_CALLING_GERMLINE_ALL.out.tbi_all, + cram_variant_calling_tumor_only, + BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.vcf_all, + BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.tbi_all, + cram_variant_calling_pair, + BAM_VARIANT_CALLING_SOMATIC_ALL.out.vcf_all, + BAM_VARIANT_CALLING_SOMATIC_ALL.out.tbi_all, + fasta, + fasta_fai, + params.concatenate_vcfs, + params.filter_vcfs, + params.snv_consensus_calling, + params.normalize_vcfs, + params.varlociraptor_chunk_size, + varlociraptor_scenario_germline, + varlociraptor_scenario_somatic, + varlociraptor_scenario_tumor_only, + ) + + // Gather vcf files for annotation and QC + // POST_VARIANTCALLING always outputs VCFs - either processed or pass-through originals + vcf_to_annotate = POST_VARIANTCALLING.out.vcfs + + CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_annotate, params.outdir) + + // Gather used variant calling softwares versions + versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.versions) + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.versions) + versions = versions.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.versions) + versions = versions.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.versions) + versions = versions.mix(POST_VARIANTCALLING.out.versions) + + // ANNOTATE + if (step == 'annotate') { + vcf_to_annotate = input_sample + } + + if (tools.split(',').contains('merge') || tools.split(',').contains('snpeff') || tools.split(',').contains('vep') || tools.split(',').contains('bcfann') || tools.split(',').contains('snpsift')) { + + vep_fasta = params.vep_include_fasta ? fasta : [[id: 'null'], []] + + VCF_ANNOTATE_ALL( + vcf_to_annotate.map { meta, vcf -> [meta + [file_name: vcf.baseName], vcf] }, + vep_fasta, + tools, + snpeff_db, + snpeff_cache, + vep_genome, + vep_species, + vep_cache_version, + vep_cache, + vep_extra_files, + bcftools_annotations, + bcftools_annotations_tbi, + bcftools_columns, + bcftools_header_lines, + snpsift_db, + ) + + // Gather used softwares versions + versions = versions.mix(VCF_ANNOTATE_ALL.out.versions) + } + } + + // + // Collate and save software versions + // + def version_yaml = channel.empty() + if (!(skip_tools.split(',').contains('versions'))) { + version_yaml = softwareVersionsToYAML( + softwareVersions: versions.mix(channel.topic("versions")), + nextflowVersion: workflow.nextflow.version, + ).collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_' + 'sarek_software_' + 'mqc_' + 'versions.yml', + sort: true, + newLine: true, + ) + } + + // + // MODULE: MultiQC + // + if (!(skip_tools.split(',').contains('multiqc'))) { + ch_multiqc_config = channel.fromPath("${projectDir}/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? channel.fromPath(params.multiqc_config, checkIfExists: true) : channel.empty() + ch_multiqc_logo = params.multiqc_logo ? channel.fromPath(params.multiqc_logo, checkIfExists: true) : channel.empty() + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = ch_multiqc_files.mix(version_yaml) + ch_multiqc_files = ch_multiqc_files.mix(reports) + ch_multiqc_files = ch_multiqc_files.mix(channel.topic("multiqc_files").map { _meta, _process, _tool, report -> report }) + + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: true)) + + MULTIQC( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList(), + [], + [], + ) + multiqc_publish = MULTIQC.out.data.mix(MULTIQC.out.plots, MULTIQC.out.report) + multiqc_report = MULTIQC.out.report.toList() + } + + emit: + multiqc_report // channel: /path/to/multiqc_report.html + multiqc_publish + versions // channel: [ path(versions.yml) ] +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Add readgroup to meta and remove lane +def addReadgroupToMeta(meta, files) { + def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' + def flowcell = flowcellLaneFromFastq(files[0]) + + // Check if flowcell ID matches + if (flowcell && flowcell != flowcellLaneFromFastq(files[1])) { + error("Flowcell ID does not match for paired reads of sample ${meta.id} - ${files}") + } + + // If we cannot read the flowcell ID from the fastq file, then we don't use it + def sample_lane_id = flowcell ? "${flowcell}.${meta.sample}.${meta.lane}" : "${meta.sample}.${meta.lane}" + + // Don't use a random element for ID, it breaks resuming + def read_group = params.umi_read_structure + ? "\"@RG\\tID:${meta.sample}\\t${CN}PU:consensus\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" + : "\"@RG\\tID:${sample_lane_id}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" + + meta = meta - meta.subMap('lane') + [read_group: read_group.toString(), sample_lane_id: sample_lane_id.toString()] + return [meta, files] +} + +// Parse first line of a FASTQ file, return the flowcell id and lane number. +def flowcellLaneFromFastq(path) { + // First line of FASTQ file contains sequence identifier plus optional description + def firstLine = readFirstLineOfFastq(path) + def flowcell_id = null + + // Expected format from ILLUMINA + // cf https://en.wikipedia.org/wiki/FASTQ_format#Illumina_sequence_identifiers + // Five fields: + // @::::... + // Seven fields or more (from CASAVA 1.8+): + // "@::::::..." + + def fields = firstLine ? firstLine.split(':') : [] + if (fields.size() == 5) { + // Get the instrument name as flowcell ID + flowcell_id = fields[0].substring(1) + } + else if (fields.size() >= 7) { + // Get the actual flowcell ID + flowcell_id = fields[2] + } + else if (fields.size() != 0) { + log.warn("FASTQ file(${path}): Cannot extract flowcell ID from ${firstLine}") + } + return flowcell_id +} + +// Get first line of a FASTQ file +def readFirstLineOfFastq(path) { + def line = null + try { + path.withInputStream { + def InputStream gzipStream = new java.util.zip.GZIPInputStream(it) + def Reader decoder = new InputStreamReader(gzipStream, 'ASCII') + def BufferedReader buffered = new BufferedReader(decoder) + line = buffered.readLine() + assert line.startsWith('@') + } + } + catch (Exception e) { + log.warn("FASTQ file(${path}): Error streaming") + log.warn("${e.message}") + } + return line +}