diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index b290e09..97c8c97 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,20 +1,20 @@ { "name": "nfcore", - "image": "nfcore/gitpod:latest", - "remoteUser": "gitpod", - "runArgs": ["--privileged"], + "image": "nfcore/devcontainer:latest", - // Configure tool-specific properties. - "customizations": { - // Configure properties specific to VS Code. - "vscode": { - // Set *default* container specific settings.json values on container create. - "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python" - }, + "remoteUser": "root", + "privileged": true, - // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] - } + "remoteEnv": { + // Workspace path on the host for mounting with docker-outside-of-docker + "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" + }, + + "onCreateCommand": "./.devcontainer/setup.sh", + + "hostRequirements": { + "cpus": 4, + "memory": "16gb", + "storage": "32gb" } } diff --git a/.devcontainer/setup.sh b/.devcontainer/setup.sh new file mode 100755 index 0000000..5b9444b --- /dev/null +++ b/.devcontainer/setup.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# Customise the terminal command prompt +echo "export PROMPT_DIRTRIM=2" >> $HOME/.bashrc +echo "export PS1='\[\e[3;36m\]\w ->\[\e[0m\\] '" >> $HOME/.bashrc +export PROMPT_DIRTRIM=2 +export PS1='\[\e[3;36m\]\w ->\[\e[0m\\] ' + +# Update Nextflow +nextflow self-update + +# Update welcome message +echo "Welcome to the nf-core/createpanelrefs devcontainer!" > /usr/local/etc/vscode-dev-containers/first-run-notice.txt diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml index 243e782..3b9724c 100644 --- a/.github/actions/nf-test/action.yml +++ b/.github/actions/nf-test/action.yml @@ -25,9 +25,9 @@ runs: version: "${{ env.NXF_VERSION }}" - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" - name: Install nf-test uses: nf-core/setup-nf-test@v1 @@ -52,15 +52,13 @@ runs: with: auto-update-conda: true conda-solver: libmamba + channels: conda-forge + channel-priority: strict conda-remove-defaults: true - # TODO Skip failing conda tests and document their failures - # https://github.com/nf-core/modules/issues/7017 - name: Run nf-test shell: bash env: - NFT_DIFF: ${{ env.NFT_DIFF }} - NFT_DIFF_ARGS: ${{ env.NFT_DIFF_ARGS }} NFT_WORKDIR: ${{ env.NFT_WORKDIR }} run: | nf-test test \ diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 16879f1..213c297 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -24,25 +24,22 @@ jobs: - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + workspace_id: ${{ vars.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + compute_env: ${{ vars.TOWER_COMPUTE_ENV }} revision: ${{ steps.revision.outputs.revision }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/createpanelrefs/work-${{ steps.revision.outputs.revision }} + workdir: s3://${{ vars.AWS_S3_BUCKET }}/work/createpanelrefs/work-${{ steps.revision.outputs.revision }} parameters: | { "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/createpanelrefs/results-${{ steps.revision.outputs.revision }}" + "outdir": "s3://${{ vars.AWS_S3_BUCKET }}/createpanelrefs/results-${{ steps.revision.outputs.revision }}" } profiles: test_full - - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: Seqera Platform debug log file path: | - seqera_platform_action_*.log - seqera_platform_action_*.json + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 7c2c028..b3e2d70 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -14,20 +14,20 @@ jobs: - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + workspace_id: ${{ vars.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + compute_env: ${{ vars.TOWER_COMPUTE_ENV }} revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/createpanelrefs/work-${{ github.sha }} + workdir: s3://${{ vars.AWS_S3_BUCKET }}/work/createpanelrefs/work-${{ github.sha }} parameters: | { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/createpanelrefs/results-test-${{ github.sha }}" + "outdir": "s3://${{ vars.AWS_S3_BUCKET }}/createpanelrefs/results-test-${{ github.sha }}" } profiles: test - - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: Seqera Platform debug log file path: | - seqera_platform_action_*.log - seqera_platform_action_*.json + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index ac030fd..6adb0ff 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9 + - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 999bcc3..45884ff 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -44,9 +44,9 @@ jobs: - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" architecture: "x64" - name: Setup Apptainer @@ -57,7 +57,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install git+https://github.com/nf-core/tools.git@dev + pip install git+https://github.com/nf-core/tools.git - name: Make a cache directory for the container images run: | @@ -127,7 +127,7 @@ jobs: fi - name: Upload Nextflow logfile for debugging purposes - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: nextflow_logfile.txt path: .nextflow.log* diff --git a/.github/workflows/fix_linting.yml b/.github/workflows/fix_linting.yml index da9b1a4..96fc86e 100644 --- a/.github/workflows/fix_linting.yml +++ b/.github/workflows/fix_linting.yml @@ -13,13 +13,13 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: token: ${{ secrets.nf_core_bot_auth_token }} # indication that the linting is being fixed - name: React on comment - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: eyes @@ -32,9 +32,9 @@ jobs: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} # Install and run pre-commit - - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" - name: Install pre-commit run: pip install pre-commit @@ -47,7 +47,7 @@ jobs: # indication that the linting has finished - name: react if linting finished succesfully if: steps.pre-commit.outcome == 'success' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: "+1" @@ -67,21 +67,21 @@ jobs: - name: react if linting errors were fixed id: react-if-fixed if: steps.commit-and-push.outcome == 'success' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: hooray - name: react if linting errors were not fixed if: steps.commit-and-push.outcome == 'failure' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: confused - name: react if linting errors were not fixed if: steps.commit-and-push.outcome == 'failure' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: issue-number: ${{ github.event.issue.number }} body: | diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index f2d7d1d..7a527a3 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,12 +11,12 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - - name: Set up Python 3.12 - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - name: Set up Python 3.14 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" - name: Install pre-commit run: pip install pre-commit @@ -28,14 +28,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" architecture: "x64" - name: read .nf-core.yml @@ -71,7 +71,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 7e8050f..e6e9bc2 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@4c1e823582f43b179e2cbb49c3eade4e41f992e2 # v10 + uses: dawidd6/action-download-artifact@ac66b43f0e6a346234dd65d4d0c8fbb31cb316e5 # v11 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2 + uses: marocchino/sticky-pull-request-comment@773744901bac0e8cbb5a0dc842800d45e9b2b405 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index f03aea0..c98d76e 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -1,12 +1,5 @@ name: Run nf-test on: - push: - paths-ignore: - - "docs/**" - - "**/meta.yml" - - "**/*.md" - - "**/*.png" - - "**/*.svg" pull_request: paths-ignore: - "docs/**" @@ -25,7 +18,7 @@ concurrency: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - NFT_VER: "0.9.2" + NFT_VER: "0.9.3" NFT_WORKDIR: "~" NXF_ANSI_LOG: false NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity @@ -35,7 +28,7 @@ jobs: nf-test-changes: name: nf-test-changes runs-on: # use self-hosted runners - - runs-on=$-nf-test-changes + - runs-on=${{ github.run_id }}-nf-test-changes - runner=4cpu-linux-x64 outputs: shard: ${{ steps.set-shards.outputs.shard }} @@ -47,7 +40,7 @@ jobs: rm -rf ./* || true rm -rf ./.??* || true ls -la ./ - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: fetch-depth: 0 @@ -69,7 +62,7 @@ jobs: needs: [nf-test-changes] if: ${{ needs.nf-test-changes.outputs.total_shards != '0' }} runs-on: # use self-hosted runners - - runs-on=$-nf-test + - runs-on=${{ github.run_id }}-nf-test - runner=4cpu-linux-x64 strategy: fail-fast: false @@ -85,35 +78,52 @@ jobs: - isMain: false profile: "singularity" NXF_VER: - - "24.04.2" + - "25.04.0" - "latest-everything" env: NXF_ANSI_LOG: false TOTAL_SHARDS: ${{ needs.nf-test-changes.outputs.total_shards }} steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: fetch-depth: 0 - name: Run nf-test + id: run_nf_test uses: ./.github/actions/nf-test + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} env: - NFT_DIFF: ${{ env.NFT_DIFF }} - NFT_DIFF_ARGS: ${{ env.NFT_DIFF_ARGS }} NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + NXF_VERSION: ${{ matrix.NXF_VER }} with: profile: ${{ matrix.profile }} shard: ${{ matrix.shard }} total_shards: ${{ env.TOTAL_SHARDS }} + + - name: Report test status + if: ${{ always() }} + run: | + if [[ "${{ steps.run_nf_test.outcome }}" == "failure" ]]; then + echo "::error::Test with ${{ matrix.NXF_VER }} failed" + # Add to workflow summary + echo "## ❌ Test failed: ${{ matrix.profile }} | ${{ matrix.NXF_VER }} | Shard ${{ matrix.shard }}/${{ env.TOTAL_SHARDS }}" >> $GITHUB_STEP_SUMMARY + if [[ "${{ matrix.NXF_VER }}" == "latest-everything" ]]; then + echo "::warning::Test with latest-everything failed but will not cause workflow failure. Please check if the error is expected or if it needs fixing." + fi + if [[ "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then + exit 1 + fi + fi + confirm-pass: needs: [nf-test] if: always() runs-on: # use self-hosted runners - - runs-on=$-confirm-pass + - runs-on=${{ github.run_id }}-confirm-pass - runner=2cpu-linux-x64 steps: - - name: One or more tests failed + - name: One or more tests failed (excluding latest-everything) if: ${{ contains(needs.*.result, 'failure') }} run: exit 1 @@ -132,11 +142,3 @@ jobs: echo "DEBUG: toJSON(needs) = ${{ toJSON(needs) }}" echo "DEBUG: toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" echo "::endgroup::" - - - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner - if: always() - run: | - ls -la ./ - rm -rf ./* || true - rm -rf ./.??* || true - ls -la ./ diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index 4abaf48..431d3d4 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -14,6 +14,10 @@ jobs: run: | echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" | sed 's/-//g' >> $GITHUB_OUTPUT + - name: get description + id: get_description + run: | + echo "description=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .description')" >> $GITHUB_OUTPUT - uses: rzr/fediverse-action@master with: access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} @@ -22,7 +26,7 @@ jobs: # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release message: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - + ${{ steps.get_description.outputs.description }} Please see the changelog: ${{ github.event.release.html_url }} ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics @@ -30,7 +34,7 @@ jobs: bsky-post: runs-on: ubuntu-latest steps: - - uses: zentered/bluesky-post-action@4aa83560bb3eac05dbad1e5f221ee339118abdd2 # v0.2.0 + - uses: zentered/bluesky-post-action@6461056ea355ea43b977e149f7bf76aaa572e5e8 # v0.3.0 with: post: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! diff --git a/.github/workflows/template-version-comment.yml b/.github/workflows/template-version-comment.yml index beb5c77..dea5dda 100644 --- a/.github/workflows/template-version-comment.yml +++ b/.github/workflows/template-version-comment.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: ref: ${{ github.event.pull_request.head.sha }} @@ -23,7 +23,6 @@ jobs: run: | python -m pip install --upgrade pip pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} - - name: Check nf-core outdated id: nf_core_outdated run: echo "OUTPUT=$(pip list --outdated | grep nf-core)" >> ${GITHUB_ENV} diff --git a/.gitignore b/.gitignore index a42ce01..9e30720 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ testing/ testing* *.pyc null/ +.nf-test +.nf-test.log diff --git a/.gitpod.yml b/.gitpod.yml deleted file mode 100644 index 83599f6..0000000 --- a/.gitpod.yml +++ /dev/null @@ -1,10 +0,0 @@ -image: nfcore/gitpod:latest -tasks: - - name: Update Nextflow and setup pre-commit - command: | - pre-commit install --install-hooks - nextflow self-update - -vscode: - extensions: - - nf-core.nf-core-extensionpack # https://github.com/nf-core/vscode-extensionpack diff --git a/.nf-core.yml b/.nf-core.yml index 28e1a22..8dff0d0 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,12 +1,8 @@ lint: files_exist: - - .github/workflows/ci.yml - conf/modules.config - files_unchanged: - - .gitattributes - - .gitignore modules_config: false -nf_core_version: 3.3.1 +nf_core_version: 3.5.1 repository_type: pipeline template: author: "@maxulysse" @@ -17,4 +13,4 @@ template: name: createpanelrefs org: nf-core outdir: . - version: 1.0dev + version: 1.0.0 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9d0b248..d06777a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,9 +4,9 @@ repos: hooks: - id: prettier additional_dependencies: - - prettier@3.5.0 + - prettier@3.6.2 - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: trailing-whitespace args: [--markdown-linebreak-ext=md] diff --git a/.prettierignore b/.prettierignore index edd29f0..dd749d4 100644 --- a/.prettierignore +++ b/.prettierignore @@ -10,4 +10,7 @@ testing/ testing* *.pyc bin/ +.nf-test/ ro-crate-metadata.json +modules/nf-core/ +subworkflows/nf-core/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 92e5780..1a2ffd6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,14 +3,68 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0dev - [date] +## [1.0.0](https://github.com/nf-core/createpanelrefs/releases/tag/1.0.0) - Hell's Gate +Hell's Gate National Park is a national park situated near Lake Naivasha in Kenya. Initial release of nf-core/createpanelrefs, created with the [nf-core](https://nf-co.re/) template. ### `Added` +- [#5](https://github.com/nf-core/createpanelrefs/pull/5) - `CNVKIT` can be used to create a PON +- [#5](https://github.com/nf-core/createpanelrefs/pull/5) - Usage of nf-validation +- [#5](https://github.com/nf-core/createpanelrefs/pull/5) - Usage of nf-test +- [#8](https://github.com/nf-core/createpanelrefs/pull/8) - `Mutect2` can be used to create a PON +- [#10](https://github.com/nf-core/createpanelrefs/pull/10) - `GATK germlinecnvcaller` can be used to create a PON +- [#17](https://github.com/nf-core/createpanelrefs/pull/17) - `GENS` can be used to create a PON +- [#50](https://github.com/nf-core/createpanelrefs/pull/50) - Add auto creation of interval_list file from gens, and bed file for mutect2 +- [#62](https://github.com/nf-core/createpanelrefs/pull/62) - Add megatests + +### `Updated` + +- [#19](https://github.com/nf-core/createpanelrefs/pull/19) - Updates germlinecnvcaller subworkflow to handle exome samples +- [#24](https://github.com/nf-core/createpanelrefs/pull/24) - Updates germlinecnvcaller subworkflow to use mappability and segmental duplications track +- [#24](https://github.com/nf-core/createpanelrefs/pull/24) - Updates germlinecnvcaller and gens subworkflows to use custom names for panel of normals. +- [#28](https://github.com/nf-core/createpanelrefs/pull/28) - Updates default args for gens subworkflow and made the parameters available from the command line. +- [#31](https://github.com/nf-core/createpanelrefs/pull/31) - Publish interval_list file from gens subworkflow by default. +- [#35](https://github.com/nf-core/createpanelrefs/pull/35) - Template update for nf-core/tools v3.0.2 +- [#35](https://github.com/nf-core/createpanelrefs/pull/35) - Improve pipeline level tests +- [#48](https://github.com/nf-core/createpanelrefs/pull/48) - Improve CI (early failure + automatic nf-test shards + [RunsOn](https://runs-on.com/)) +- [#49](https://github.com/nf-core/createpanelrefs/pull/49) - Improve CI (Test Mutect2 with CRAM + better usage of test references) +- [#49](https://github.com/nf-core/createpanelrefs/pull/49) - Move all parameters in the schema that are references in the references section +- [#50](https://github.com/nf-core/createpanelrefs/pull/50) - Improve references related files handling +- [#50](https://github.com/nf-core/createpanelrefs/pull/50) - Heavy refactoring of the pipeline +- [#52](https://github.com/nf-core/createpanelrefs/pull/52) - Template update for nf-core/tools v3.2.1 +- [#54](https://github.com/nf-core/createpanelrefs/pull/54) - Template update for nf-core/tools v3.3.1 +- [#54](https://github.com/nf-core/createpanelrefs/pull/54) - Update nft-utils to 0.0.4 +- [#55](https://github.com/nf-core/createpanelrefs/pull/55) - Prepare relase 1.0.0 +- [#63](https://github.com/nf-core/createpanelrefs/pull/63) - Template update for nf-core/tools v3.5.0dev +- [#66](https://github.com/nf-core/createpanelrefs/pull/66) - Update `GENS` to allow for creating a long-read PON + ### `Fixed` -### `Dependencies` +- [#50](https://github.com/nf-core/createpanelrefs/pull/50) - Fix mutect2 that wasn't working without a bed file +- [#53](https://github.com/nf-core/createpanelrefs/pull/53) - Minor syntax fixes due to [#50](https://github.com/nf-core/createpanelrefs/pull/50) +- [#54](https://github.com/nf-core/createpanelrefs/pull/54) - Fix name for `_mqc_versions.yml` file +- [#56](https://github.com/nf-core/createpanelrefs/pull/56) - Fix gcnv interval list +- [#57](https://github.com/nf-core/createpanelrefs/pull/57) - Improve syntax in `assets/schema_input.json` file, from @nvnieuwk in [#46](https://github.com/nf-core/createpanelrefs/pull/46) +- [#57](https://github.com/nf-core/createpanelrefs/pull/57) - Fix missing documentation for GATK Mutect2 and GENS + +### `Dependencies` - modules + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| cnvkit | | 0.9.12 | +| gatk4 | | 4.6.2.0 | +| gawk | | 5.3.0 | +| htslib | | 1.22.1 | +| mosdepth | | 0.3.11 | +| multiqc | | 1.32 | +| samtools | | 1.22.1 | + +### `Dependencies` - Nextflow plugins + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| nf-schema | | 2.5.1 | ### `Deprecated` diff --git a/CITATIONS.md b/CITATIONS.md index 5c670f5..a50d9e5 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,13 +10,17 @@ ## Pipeline tools -- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +- [CNVKIT](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873) -> Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + > Talevich E, Shain AH, Botton T, Bastian BC (2016) CNVkit: Genome-Wide Copy Number Detection and Visualization from Targeted DNA Sequencing. PLoS Comput Biol 12(4): e1004873. doi: 10.1371/journal.pcbi.1004873. PubMed PMID: 27100738. PubMed Central PMCID: PMC4839673. + +- [GATK] (https://genome.cshlp.org/content/20/9/1297) + + > McKenna A, Hanna M, Banks E, et al. The Genome Analysis Toolkit: A MapReduce framework for analyzing next-generation DNA sequencing data. Genome Res. 2010;20(9):1297-1303. doi:10.1101/gr.107524.110 - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) -> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. ## Software packaging/containerisation tools diff --git a/README.md b/README.md index 489dfbb..dd93a3a 100644 --- a/README.md +++ b/README.md @@ -5,12 +5,13 @@ -[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml) +[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/createpanelrefs) +[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml) [![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.04.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) -[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.1) +[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -20,47 +21,47 @@ ## Introduction -**nf-core/createpanelrefs** is a bioinformatics pipeline that ... +**nf-core/createpanelrefs** is a bioinformatics helper pipeline that will help in creating panel of normals and other models. - - - -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +2. Build Panel of Normals for [`CNVKIT`](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873) +3. Build ploidy and cnv calling models for [`GATK's germlinecnvcaller workflow`](https://genome.cshlp.org/content/20/9/1297) +4. Build Panel of Normals for [`GENS`](https://github.com/Clinical-Genomics-Lund/gens) +5. Build Panel of Normals for [`Mutect2`](https://genome.cshlp.org/content/20/9/1297) +6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) ## Usage > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - +| Tool | Alignment format | +| ----------------- | ---------------------------- | +| cnvkit | bam | +| germlinecnvcaller | bam or cram or a mix of both | Now, you can run the pipeline using: - - ```bash nextflow run nf-core/createpanelrefs \ -profile \ --input samplesheet.csv \ + --tools \ + --genome GATK.GRCh38 \ --outdir ``` @@ -78,10 +79,14 @@ For more details about the output files and reports, please refer to the ## Credits nf-core/createpanelrefs was originally written by @maxulysse. +@marrip contributed in the idea that started it all. +@matthdsm and @FriederikeHanssen contributed in the actual design. +@ramprasadn's interest was the final push that led to the creation. We thank the following people for their extensive assistance in the development of this pipeline: - +- @jfy133 +- @JoseEspinosa ## Contributions and Support @@ -94,8 +99,6 @@ For further information or help, don't hesitate to get in touch on the [Slack `# - - An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. You can cite the `nf-core` publication as follows: diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 84155f8..76fa009 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,8 +3,6 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/createpanelrefs Methods Description" section_href: "https://github.com/nf-core/createpanelrefs" plot_type: "html" -## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline -## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

Data was processed using nf-core/createpanelrefs v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 767cbd1..a905e53 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/createpanelrefs + This report has been generated by the nf-core/createpanelrefs analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-createpanelrefs-methods-description": order: -1000 diff --git a/assets/schema_input.json b/assets/schema_input.json index e749db3..6bebae0 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/nf-core/createpanelrefs/master/assets/schema_input.json", + "$id": "https://raw.githubusercontent.com/nf-core/createpanelrefs/main/assets/schema_input.json", "title": "nf-core/createpanelrefs pipeline - params.input schema", "description": "Schema for the file provided with params.input", "type": "array", @@ -13,21 +13,36 @@ "errorMessage": "Sample name must be provided and cannot contain spaces", "meta": ["id"] }, - "fastq_1": { + "bam": { + "errorMessage": "BAM file cannot contain spaces, has to exist and must have extension '.bam'", "type": "string", + "pattern": "^\\S+\\.bam$", "format": "file-path", - "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "exists": true }, - "fastq_2": { + "bai": { + "errorMessage": "BAM index file cannot contain spaces and must have extension '.bai'", "type": "string", + "pattern": "^\\S+\\.bai$", "format": "file-path", - "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "exists": true + }, + "cram": { + "errorMessage": "CRAM file cannot contain spaces and must have extension '.cram'", + "type": "string", + "pattern": "^\\S+\\.cram$", + "format": "file-path", + "exists": true + }, + "crai": { + "errorMessage": "CRAM index file cannot contain spaces and must have extension '.crai'", + "type": "string", + "pattern": "^\\S+\\.crai$", + "format": "file-path", + "exists": true } }, - "required": ["sample", "fastq_1"] + "required": ["sample"], + "oneOf": [{ "required": ["bam"] }, { "required": ["cram"] }] } } diff --git a/conf/base.config b/conf/base.config index be34b4d..13625ed 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,6 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { 1 * task.attempt } memory = { 6.GB * task.attempt } time = { 4.h * task.attempt } @@ -19,13 +18,6 @@ process { maxRetries = 1 maxErrors = '-1' - // Process-specific resource requirements - // NOTE - Please try and reuse the labels below as much as possible. - // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. - // If possible, it would be nice to keep the same label naming convention when - // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { 1 } memory = { 6.GB * task.attempt } @@ -61,5 +53,6 @@ process { } withLabel: process_gpu { ext.use_gpu = { workflow.profile.contains('gpu') } + accelerator = { workflow.profile.contains('gpu') ? 1 : null } } } diff --git a/conf/igenomes.config b/conf/igenomes.config index 3f11437..d0f5074 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -11,430 +11,473 @@ params { // illumina iGenomes reference file paths genomes { - 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" + 'Ensembl.AGPv3' { + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + mito_name = "Mt" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" } - 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + 'Ensembl.BDGP6' { + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + macs_gsize = "1.2e8" + mito_name = "M" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" } - 'CHM13' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" - bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" - gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" - mito_name = "chrM" + 'Ensembl.CanFam3.1' { + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" + mito_name = "MT" + readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" } - 'GRCm38' { - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" + 'Ensembl.CHIMP2.1.4' { + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" mito_name = "MT" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" } - 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" - mito_name = "Mt" + 'Ensembl.EB1' { + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" } - 'EB2' { - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + 'Ensembl.EB2' { + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" } - 'UMD3.1' { - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" - mito_name = "MT" - } - 'WBcel235' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - mito_name = "MtDNA" - macs_gsize = "9e7" - } - 'CanFam3.1' { - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" - mito_name = "MT" - } - 'GRCz10' { - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" + 'Ensembl.EF2' { + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + macs_gsize = "1.21e7" mito_name = "MT" + readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" } - 'BDGP6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - mito_name = "M" - macs_gsize = "1.2e8" - } - 'EquCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + 'Ensembl.EquCab2' { + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" mito_name = "MT" + readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" + star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" } - 'EB1' { - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" - } - 'Galgal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + 'Ensembl.Galgal4' { + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" mito_name = "MT" + star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" } - 'Gm01' { - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + 'Ensembl.Gm01' { + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" } - 'Mmul_1' { - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" + 'Ensembl.GRCh37' { + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + macs_gsize = "2.7e9" mito_name = "MT" + readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" } - 'IRGSP-1.0' { - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + 'Ensembl.GRCm38' { + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" + dbsnp = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.snps_all.dbSNP142.vcf.gz" + dbsnp_tbi = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.snps_all.dbSNP142.vcf.gz.tbi" + dict = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.dict" + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa.fai" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + intervals = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/intervals/GRCm38_calling_list.bed" + known_indels = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz" + known_indels_tbi = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz.tbi" + macs_gsize = "1.87e9" + mito_name = "MT" + readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + } + 'Ensembl.GRCz10' { + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + mito_name = "MT" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + } + 'Ensembl.IRGSP-1.0' { + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" mito_name = "Mt" + star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" } - 'CHIMP2.1.4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" + 'Ensembl.Mmul_1' { + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" mito_name = "MT" + readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" } - 'Rnor_5.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" + 'Ensembl.R64-1-1' { + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + macs_gsize = "1.2e7" + mito_name = "MT" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + } + 'Ensembl.Rnor_5.0' { + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" mito_name = "MT" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" } - 'Rnor_6.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + 'Ensembl.Rnor_6.0' { + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'R64-1-1' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - mito_name = "MT" - macs_gsize = "1.2e7" - } - 'EF2' { - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" mito_name = "MT" - macs_gsize = "1.21e7" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" } - 'Sbi1' { - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + 'Ensembl.Sbi1' { + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" } - 'Sscrofa10.2' { - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + 'Ensembl.Sscrofa10.2' { + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" mito_name = "MT" + readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" + star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" } - 'AGPv3' { - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" + 'Ensembl.TAIR10' { + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" mito_name = "Mt" + readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" } - 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + 'Ensembl.UMD3.1' { + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + mito_name = "MT" + readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" } - 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" + 'Ensembl.WBcel235' { + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + macs_gsize = "9e7" + mito_name = "MtDNA" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" } - 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" + 'GATK.GRCh37' { + bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/BWAIndex/" + dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz" + dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz.tbi" + dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict" + fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta" + fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai" + intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/intervals/wgs_calling_regions_Sarek.list" + known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz" + known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz.tbi" + known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz" + known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz.tbi" + } + 'GATK.GRCh38' { + bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAmem2Index/" + dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" + dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" + dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" + dragmap = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/dragmap/" + fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta" + fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai" + intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/intervals/wgs_calling_regions_noseconds.hg38.bed" + known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" + known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" + known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz" + known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz.tbi" + pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz" + pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi" + } + 'NCBI.GRCh38' { + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + macs_gsize = "2.7e9" mito_name = "chrM" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" } - 'bosTau8' { - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" + 'UCSC.bosTau8' { + bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" mito_name = "chrM" + star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" } - 'ce10' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" + 'UCSC.canFam3' { + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + mito_name = "chrM" + readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + } + 'UCSC.ce10' { + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" - mito_name = "chrM" macs_gsize = "9e7" + mito_name = "chrM" + readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" + star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" } - 'canFam3' { - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" + 'UCSC.CHM13' { + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2TCHM13v2.0/GCF_009914755.1_T2TCHM13v2.0_genomic.gff.gz" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" mito_name = "chrM" } - 'danRer10' { - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" + 'UCSC.danRer10' { + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" - mito_name = "chrM" macs_gsize = "1.37e9" + mito_name = "chrM" + star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" } - 'dm6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" + 'UCSC.dm6' { + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" - mito_name = "chrM" macs_gsize = "1.2e8" + mito_name = "chrM" + star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" } - 'equCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" + 'UCSC.equCab2' { + bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" mito_name = "chrM" + readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" + star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" } - 'galGal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + 'UCSC.galGal4' { + bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" + mito_name = "chrM" readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" + star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + } + 'UCSC.hg19' { + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + macs_gsize = "2.7e9" mito_name = "chrM" + readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" } - 'panTro4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" + 'UCSC.hg38' { + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + macs_gsize = "2.7e9" + mito_name = "chrM" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + } + 'UCSC.mm10' { + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + macs_gsize = "1.87e9" + mito_name = "chrM" + readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + } + 'UCSC.panTro4' { + bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" mito_name = "chrM" + readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" + star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" } - 'rn6' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" + 'UCSC.rn6' { + bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" mito_name = "chrM" + star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" } - 'sacCer3' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" + 'UCSC.sacCer3' { bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" - readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" - mito_name = "chrM" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" macs_gsize = "1.2e7" + mito_name = "chrM" + readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" } - 'susScr3' { - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + 'UCSC.susScr3' { + bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" mito_name = "chrM" + readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" + star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + } + 'GRCh38.chr21.testdata' { + fasta = "${params.igenomes_base}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" + dict = "${params.igenomes_base}/genomics/homo_sapiens/genome/chr21/sequence/genome.dict" + fai = "${params.igenomes_base}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" + mutect2_target_bed = "${params.igenomes_base}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + } + 'GRCh38.chr22.testdata' { + fasta = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.fasta" + dict = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.dict" + fai = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.fasta.fai" + gcnv_ploidy_priors = "${params.igenomes_base}/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" } } } diff --git a/conf/modules.config b/conf/modules/base.config similarity index 95% rename from conf/modules.config rename to conf/modules/base.config index d203d2b..b75cecd 100644 --- a/conf/modules.config +++ b/conf/modules/base.config @@ -11,17 +11,12 @@ */ process { - publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: FASTQC { - ext.args = '--quiet' - } - withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ @@ -30,5 +25,4 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - } diff --git a/conf/modules/cnvkit.config b/conf/modules/cnvkit.config new file mode 100644 index 0000000..c0396a0 --- /dev/null +++ b/conf/modules/cnvkit.config @@ -0,0 +1,28 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + withName: SAMTOOLS_VIEW { + ext.args = {"--output-fmt bam"} + } + + withName: CNVKIT_BATCH { + ext.args = {"--method wgs --output-reference ${meta.id}.cnn"} + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/cnvkit/" }, + pattern: "*{.cnn}" + ] + ] + } +} diff --git a/conf/modules/gens_pon.config b/conf/modules/gens_pon.config new file mode 100644 index 0000000..d7e7202 --- /dev/null +++ b/conf/modules/gens_pon.config @@ -0,0 +1,86 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + withName: '.*GENS_PON.*' { + publishDir = [ + enabled: false + ] + } + + withName: '.*GENS_PON:CAT_CAT' { + ext.prefix = { "${meta.id}_concat" } + } + + withName: '.*GENS_PON:GATK4_COLLECTREADCOUNTS' { + ext.args = { "--format ${params.gens_readcount_format} --imr OVERLAPPING_ONLY" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/gens_pon/readcounts" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*GENS_PON:INTERVAL_LIST_TO_BED' { + ext.prefix = { "gens_coverage_bins" } + ext.suffix = "bed" + ext.args2 = '\'BEGIN { FS=OFS="\t" } $2 < $3 { print $1, $2, $3 }\'' + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/references/intervals/gens_pon/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*GENS_PON:GATK4_CREATEREADCOUNTPANELOFNORMALS' { + ext.args = { + [ + "--minimum-interval-median-percentile ${params.gens_min_interval_median_percentile}", + "--maximum-chunk-size ${params.gens_maximum_chunk_size}", + ].join(" ") + } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/gens_pon/createreadcountpanelofnormals" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*GENS_PON:MOSDEPTH' { + ext.args = { + [ + '--no-per-base', + '--fast-mode', + ].join(' ') + } + } + + withName: '.*GENS_PON:MOSDEPTH_GATK_FORMAT' { + ext.prefix = { "${meta.id}_gatk_formatted" } + ext.suffix = "gatk_format.tsv" + ext.args = { "-v sample=${meta.id}" } + ext.args2 = '\'BEGIN { OFS="\\t" } { $4 = int($4 + 0.5); $2++; print $1, $2, $3, $4 }\'' + } + + withName: '.*GENS_PON:MOSDEPTH_GATK_HEADER' { + // This requires meta.id to be the same as the sample name in the BAM/CRAM header + ext.args = { "-v sample=${meta.id}" } + ext.args2 = '\'{ print } END { print "@RG\\tID:GATKCopyNumber\\tSM:" sample; print "CONTIG\\tSTART\\tEND\\tCOUNT" }\'' + ext.prefix = { "${meta.id}" } + ext.suffix = 'mosdepth_gatk_header.tsv' + } + + withName: '.*GENS_PON:SAMTOOLS_VIEW' { + ext.args = '-H --output-fmt sam' + ext.prefix = { "${meta.id}.mosdepth_gatk_header" } + } +} diff --git a/conf/modules/germlinecnvcaller_cohort.config b/conf/modules/germlinecnvcaller_cohort.config new file mode 100644 index 0000000..132ce0a --- /dev/null +++ b/conf/modules/germlinecnvcaller_cohort.config @@ -0,0 +1,84 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + withName: '.*GERMLINECNVCALLER_COHORT.*' { + publishDir = [ + enabled: false + ] + } + + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_INDEXFEATUREFILE_MAPPABILITY' { + ext.when = { !params.gcnv_mappable_regions.equals(null) } + } + + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_INDEXFEATUREFILE_SEGDUP' { + ext.when = { !params.gcnv_segmental_duplications.equals(null) } + } + + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST_TARGETS' { + ext.when = { params.gcnv_analysis_type.equals("wes") && params.gcnv_target_interval_list.equals(null) && params.gcnv_target_bed } + } + + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST_EXCLUDE' { + ext.when = { params.gcnv_analysis_type.equals("wes") && params.gcnv_exclude_interval_list.equals(null) && params.gcnv_exclude_bed } + } + + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS' { + ext.args = { ["--imr OVERLAPPING_ONLY", + "--padding ${params.gcnv_padding}", + "--bin-length ${params.gcnv_bin_length}"].join(" ") + } + } + + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_COLLECTREADCOUNTS' { + ext.args = {"--format ${params.gcnv_readcount_format} --imr OVERLAPPING_ONLY"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/germlinecnvcaller/readcounts" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_ANNOTATEINTERVALS' { + ext.args = {"--imr OVERLAPPING_ONLY"} + ext.prefix = {" ${meta.id}_annotated"} + } + + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_FILTERINTERVALS' { + ext.args = {"--imr OVERLAPPING_ONLY"} + ext.prefix = {" ${meta.id}_filtered"} + } + + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_INTERVALLISTTOOLS' { + ext.args = {"--SUBDIVISION_MODE INTERVAL_COUNT --SCATTER_CONTENT ${params.gcnv_scatter_content}"} + } + + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_DETERMINEGERMLINECONTIGPLOIDY' { + ext.args = {"--imr OVERLAPPING_ONLY"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/germlinecnvcaller/determinegermlinecontigploidy" }, + pattern: "*-model", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_GERMLINECNVCALLER' { + ext.args = {"--imr OVERLAPPING_ONLY --run-mode COHORT"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/germlinecnvcaller/germlinecnvcaller" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/mutect2.config b/conf/modules/mutect2.config new file mode 100644 index 0000000..7e56a51 --- /dev/null +++ b/conf/modules/mutect2.config @@ -0,0 +1,17 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + withName: '.*BAM_CREATE_SOM_PON_GATK:GATK4_MUTECT2' { + ext.args = "--max-mnp-distance 0" + } +} diff --git a/conf/modules/prepare_genome.config b/conf/modules/prepare_genome.config new file mode 100644 index 0000000..161ad9f --- /dev/null +++ b/conf/modules/prepare_genome.config @@ -0,0 +1,50 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + withName: 'BUILD_INTERVALS' { + ext.args = { "-v FS='\t' -v OFS='\t' '{ print \$1, \"0\", \$2 }'" } + ext.suffix = { "bed" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/intervals/mutect2_target_bed" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GATK4_CREATESEQUENCEDICTIONARY' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/references" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GATK4_PREPROCESSINTERVALS_GENS' { + ext.args = { ["--imr OVERLAPPING_ONLY", + "--bin-length ${params.gens_bin_length}"].join(" ") + } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/references/intervals/gens_pon/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SAMTOOLS_FAIDX' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/references" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/test.config b/conf/test.config index 9575fae..079facb 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,8 +23,15 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'// Genome references - genome = 'R64-1-1' + input = "${projectDir}/tests/csv/1.0.0/bam.csv" + + // Main options + tools = 'cnvkit' + + //Germlinecnvcaller options + gcnv_scatter_content = 2 + + // Small reference genome + genome = 'GRCh38.chr21.testdata' + igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data' } diff --git a/conf/test_full.config b/conf/test_full.config index cb4a8c6..4120344 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,10 +15,9 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = "https://raw.githubusercontent.com/nf-core/test-datasets/createpanelrefs/csv/1.0/recal_cram_sarek.csv" + tools = "cnvkit,germlinecnvcaller,gens,mutect2" // Genome references - genome = 'R64-1-1' + genome = 'GATK.GRCh38' } diff --git a/docs/output.md b/docs/output.md index 5afcc1d..e2639a1 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,28 +6,95 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [FastQC](#fastqc) - Raw read QC +- [CNVKit](#cnvkit) - Create reference files for copy number variant detection from sequencing data. +- [GATK's germlinecnvcaller](#germlinecnvcaller) - Publish read counts, ploidy and cnvcalling models that can be used to call cnv's in the case mode. +- [GATK's Mutect2](#gatk-mutect2) - Create panel of normals for somatic variant calling. +- [GENS](#gens) - Create panel of normals for read-count denoising. - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution -### FastQC +### CNVKit + +
+Output files + +- `results/reference/cnvkit/` + - "panel.cnn": Panel reference file containing coverage information for copy number. + - ".antitargetcoverage.cnn": Antitarget coverage file for each sample. + - ".targetcoverage.cnn": Target coverage file for each sample. + +
+ +[CNVKit](https://cnvkit.readthedocs.io/en/stable/index.html) is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. +In this pipeline, CNVKit creates reference files that can be used for copy number variant detection. +The workflow processes normal samples to generate a reference CNN file that captures the baseline coverage patterns, which can then be used for tumor-only or tumor-normal CNV analysis in downstream applications. +The reference file contains coverage information normalized across the cohort and is essential for accurate copy number calling. + +### GATK germlinecnvcaller + +
+Output files + +- `results/germlinecnvcaller/` + - `determinecontigploidy` + - `cohort-model`: Contig ploidy model. + - `germlinecnvcaller` + - `*_model`: CNV caller model for each scattered shard. + - `readcounts` + - `*.hdf5|.tsv`: Read count statistics for each sample. + - `references` + - `*.dict`: Sequence dictionary file. This file is not published if user supplies this file to the pipeline using the `--dict` parameter. + - `*.fai`: Fasta index file. This file is not published if user supplies this file to the pipeline using the `--fai` parameter. + +
+ +[GATK](https://github.com/broadinstitute/gatk) is a toolkit which offers a wide variety of tools with a primary focus on variant discovery and genotyping. In this pipeline we have implemented GATK's germlinecnvcalling workflow for analysing a cohort of samples. The output files generated from this analysis can be used for analysing samples in case mode. For more information about the workflow and output files, see GATK's documentation [here.](https://gatk.broadinstitute.org/hc/en-us/articles/360035531152--How-to-Call-common-and-rare-germline-copy-number-variants) + +### GATK Mutect2
Output files -- `fastqc/` - - `*_fastqc.html`: FastQC report containing quality metrics. - - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +- `results/gatk4/` + - `mutect2/` + - `*.vcf.gz`: Compressed VCF files containing somatic variant calls for each sample. + - `*.vcf.gz.tbi`: Tabix index files for the VCF files. + - `*.vcf.gz.stats`: Statistics files containing detailed metrics for each sample. + - `*.f1r2.tar.gz`: Files containing information for LearnReadOrientationModel (only output when tumor-normal pair mode is run). + - `genomicsdb/` + - `{pon_name}/`: GenomicsDB workspace containing all sample VCFs combined. + - `createsomaticpanelofnormals/` + - `{pon_name}.vcf.gz`: Final panel of normals VCF file. + - `{pon_name}.vcf.gz.tbi`: Tabix index for the panel of normals VCF.
-[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +[GATK Mutect2](https://gatk.broadinstitute.org/hc/en-us/articles/360035894731-Mutect2) creates a panel of normals from normal samples for somatic variant calling. The workflow: (1) calls variants in each normal sample using Mutect2 in panel of normals mode, (2) imports all VCFs into a GenomicsDB workspace, and (3) creates a final panel of normals VCF file. This panel can be used with Mutect2 in case mode via the `--panel-of-normals` parameter to filter out common germline variants and sequencing artifacts. + +### GENS + +
+Output files + +- `results/gatk4/` + - `collectreadcounts/` + - `*.hdf5`: Read count data in HDF5 format for each sample. + - `*.tsv`: Read count data in TSV format for each sample. + - `createreadcountpanelofnormals/` + - `{pon_name}.hdf5`: Final panel of normals file in HDF5 format. + - `references/intervals/gens_pon/` + - `*.interval_list`: Interval list file used for read count collection. + - `*.bed`: BED versions of interval list file used for read count collection for long-reads. + +
+ +[GENS](https://github.com/Clinical-Genomics-Lund/gens) creates a panel of normals for read-count denoising to improve somatic variant detection. The workflow: (1) indexes BAM/CRAM files if needed, (2) collects read counts at specified intervals using GATK's CollectReadCounts, and (3) creates a panel of normals using GATK's CreateReadCountPanelOfNormals. This panel can be used with GENS for somatic variant calling to reduce technical noise and improve variant detection sensitivity. + +When `gens_analysis_type` is set to 'lrs', a modified version of the workflow above is run where coverage calculated by mosdepth is used instead of read counts. ### MultiQC diff --git a/docs/usage.md b/docs/usage.md index c548b62..c9d6da6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,50 +6,35 @@ ## Introduction - - ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use the `--input` parameter to specify its location. It has to be a comma-separated file and recognizes the following fields as column headers. + +| Fields | Description | +| -------- | ------------------------------ | +| `sample` | Custom sample name. | +| `bam` | Alignment file in bam format. | +| `bai` | bam file index. | +| `cram` | Alignment file in cram format. | +| `crai` | cram file index. | ```bash --input '[path to samplesheet file]' ``` -### Multiple runs of the same sample - -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: - -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz -``` - ### Full samplesheet -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. +The pipeline will auto-detect whether a sample is aligned in bam/cram format using the information provided in the samplesheet. The samplesheet can have either bam/cram files with or without their indices. -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 3 samples, where one sample, `SAMPLE_1` is missing its index file. ```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +sample,bam,bai +SAMPLE_1,sample1.bam, +SAMPLE_2,sample2.bam,sample2.bam.bai +SAMPLE_3,sample3.bam,sample3.bam.bai ``` -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | - An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. ## Running the pipeline @@ -57,10 +42,12 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/createpanelrefs --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker +nextflow run nf-core/createpanelrefs --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker --tools cnvkit,germlinecnvcaller ``` -This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. +This will launch the pipeline with the `docker` configuration profile, and generate reference files necessary for cnvkit and germlinecnvcaller. To learn more about what tool options are recognized by the pipeline, check the pipeline's documentation on the [nf-core website](https://nf-co.re/createpanelrefs/dev/parameters/). + +See below for more information about profiles. Note that the pipeline will create the following files in your working directory: @@ -116,6 +103,78 @@ To further assist in reproducibility, you can use share and reuse [parameter fil > [!TIP] > If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +## Workflow specific arguments + +### cnvkit + +If you are running the pipeline to generate references for the CNVkit variant calling workflow, you should consider that currently the default method for this pipeline is whole-genome. In order to use the CNVkit default, i.e. hybrid capture, when the user is creating a background for targeted capture sequencing (most commonly, exomes or panels), the user should + +1. provide an additional config file, in order to change or remove the method specified in the default `ext.args`, i.e. + +``` +process { + + withName: CNVKIT_BATCH { + ext.args = {"--output-reference ${meta.id}.cnn"} + } + +} +``` + +2. provide the `--cnvkit_target` parameter (optional) as a .bed file for the targets + +### gens + +If you are running the pipeline to generate references for the GENS workflow, you should ensure that you have provided all the mandatory options specified in the table below. + +| Mandatory | Optional | +| ------------- | --------------------- | +| fasta/genomes | fai | +| | dict | +| | gens_analysis_type | +| | gens_bin_length | +| | gens_pon_name | +| | gens_readcount_format | +| | gens_interval_list | + +The GENS workflow creates a panel of normals for read-count denoising from normal samples. This panel can be used with GENS for somatic variant calling to reduce technical noise and improve variant detection. For more information, see the [GENS documentation](https://github.com/Clinical-Genomics-Lund/gens). + +> [!NOTE] +> If `--gens_analysis_type` is set to 'lrs', this reqires the sample ID set in the sample sheet to be equal to the `SM` tag in the BAM-file. + +### germlinecnvcaller + +If you are running the pipeline to generate references for the GATK's germlinecnvcalling workflow, you should ensure that you have provided all the mandatory options specified in the table below. + +| Mandatory | Optional | +| ------------------------------ | ------------------------------------------- | +| fasta/genomes | fai | +| gcnv_ploidy_priors1 | dict | +| | gcnv_target_bed/gcnv_target_interval_list | +| | gcnv_exclude_bed/gcnv_exclude_interval_list | +| | gcnv_bin_length | +| | gcnv_mappable_regions | +| | gcnv_padding | +| | gcnv_model_name | +| | gcnv_readcount_format | +| | gcnv_scatter_content | +| | gcnv_segmental_duplications | + +1 To learn more about this file, see [this comment](https://gatk.broadinstitute.org/hc/en-us/community/posts/360074399831/comments/13441240230299) on GATK forum.
+ +### mutect2 + +If you are running the pipeline to generate references for the GATK's Mutect2 somatic variant calling workflow, you should ensure that you have provided all the mandatory options specified in the table below. + +| Mandatory | Optional | +| ------------- | ------------------ | +| fasta/genomes | fai | +| | dict | +| | mutect2_target_bed | +| | mutect2_pon_name | + +The Mutect2 workflow creates a panel of normals from normal samples for somatic variant calling. This panel can be used with Mutect2 in case mode via the `--panel-of-normals` parameter to filter out common germline variants and sequencing artifacts. For more information, see the [GATK documentation](https://gatk.broadinstitute.org/hc/en-us/articles/360035894731-Mutect2). + ## Core Nextflow arguments > [!NOTE] @@ -149,7 +208,7 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - `shifter` - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) + - A generic configuration profile to be used with [Charliecloud](https://charliecloud.io/) - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `wave` diff --git a/main.nf b/main.nf index 5a261d3..2b41c60 100644 --- a/main.nf +++ b/main.nf @@ -11,94 +11,265 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS + GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { CREATEPANELREFS } from './workflows/createpanelrefs' -include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline' -include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline' -include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline' +params.dict = getGenomeAttribute('dict') +params.fai = getGenomeAttribute('fai') +params.fasta = getGenomeAttribute('fasta') +params.gcnv_exclude_bed = getGenomeAttribute('gcnv_exclude_bed') +params.gcnv_exclude_interval_list = getGenomeAttribute('gcnv_exclude_interval_list') +params.gcnv_mappable_regions = getGenomeAttribute('gcnv_mappable_regions') +params.gcnv_ploidy_priors = getGenomeAttribute('gcnv_ploidy_priors') +params.gcnv_segmental_duplications = getGenomeAttribute('gcnv_segmental_duplications') +params.gcnv_target_bed = getGenomeAttribute('gcnv_target_bed') +params.gcnv_target_interval_list = getGenomeAttribute('gcnv_target_interval_list') +params.gens_interval_list = getGenomeAttribute('gens_interval_list') +params.mutect2_target_bed = getGenomeAttribute('mutect2_target_bed') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// TODO nf-core: Remove this line if you don't need a FASTA file -// This is an example of how to use getGenomeAttribute() to fetch parameters -// from igenomes.config using `--genome` -params.fasta = getGenomeAttribute('fasta') +include { CREATEPANELREFS } from './workflows/createpanelrefs' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline' +include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' +include { MULTIQC } from './modules/nf-core/multiqc' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from './subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from './subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOWS FOR PIPELINE + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Run main analysis pipeline depending on type of input -// -workflow NFCORE_CREATEPANELREFS { +workflow { + versions = channel.empty() + multiqc_files = channel.empty() - take: - samplesheet // channel: samplesheet read in from --input + // Initialize file channels based on params, defined in the params.genomes[params.genome] scope + user_dict = params.dict + ? channel.fromPath(params.dict).map { dict -> [[id: 'genome'], dict] }.collect() + : channel.empty() - main: + user_fai = params.fai + ? channel.fromPath(params.fai).map { fai -> [[id: 'genome'], fai] }.collect() + : channel.empty() - // - // WORKFLOW: Run pipeline - // - CREATEPANELREFS ( - samplesheet - ) - emit: - multiqc_report = CREATEPANELREFS.out.multiqc_report // channel: /path/to/multiqc_report.html -} -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + fasta = params.fasta + ? channel.fromPath(params.fasta).map { fasta -> [[id: 'genome'], fasta] }.collect() + : channel.empty() -workflow { + // Initialize cnvkit specific parameters + cnvkit_targets = params.cnvkit_targets + ? channel.fromPath(params.cnvkit_targets).map { targets -> [[id: 'genome'], targets] }.collect() + : channel.value([[id: 'genome'], []]) + + // Initialize gens interval list specific parameters + user_gens_interval_list = params.gens_interval_list + ? channel.fromPath(params.gens_interval_list).map { gens_interval_list -> [[id: 'genome'], gens_interval_list] }.collect() + : channel.empty() + + // Initialize germlinecnvcaller specific parameters + gcnv_exclude_bed = params.gcnv_exclude_bed + ? channel.fromPath(params.gcnv_exclude_bed).map { exclude -> [[id: 'genome'], exclude] }.collect() + : channel.value([[id: 'genome'], []]) + gcnv_exclude_interval_list = params.gcnv_exclude_interval_list + ? channel.fromPath(params.gcnv_exclude_interval_list).map { exclude -> [[id: 'genome'], exclude] }.collect() + : channel.value([[id: 'genome'], []]) + gcnv_mappable_regions = params.gcnv_mappable_regions + ? channel.fromPath(params.gcnv_mappable_regions).collect() + : channel.value([[id: 'genome'], []]) + gcnv_ploidy_priors = params.gcnv_ploidy_priors + ? channel.fromPath(params.gcnv_ploidy_priors).collect() + : channel.empty() + gcnv_target_bed = params.gcnv_target_bed + ? channel.fromPath(params.gcnv_target_bed).map { targets -> [[id: 'genome'], targets] }.collect() + : channel.value([[id: 'genome'], []]) + gcnv_target_interval_list = params.gcnv_target_interval_list + ? channel.fromPath(params.gcnv_target_interval_list).map { targets -> [[id: 'genome'], targets] }.collect() + : channel.value([[id: 'genome'], []]) + gcnv_segmental_duplications = params.gcnv_segmental_duplications + ? channel.fromPath(params.gcnv_segmental_duplications).collect() + : channel.value([[id: 'genome'], []]) + + // Initialize mutect2 specific parameters + user_mutect2_target_bed = params.mutect2_target_bed + ? channel.fromPath(params.mutect2_target_bed).map { targets -> [[id: 'genome'], targets] }.collect() + : channel.empty() - main: - // // SUBWORKFLOW: Run initialisation tasks - // - PIPELINE_INITIALISATION ( + PIPELINE_INITIALISATION( params.version, params.validate_params, params.monochrome_logs, args, params.outdir, - params.input + params.input, + params.help, + params.help_full, + params.show_hidden, ) - // + PREPARE_GENOME(fasta, user_dict, user_fai, user_gens_interval_list, user_mutect2_target_bed, params.tools ?: "no_tools") + + dict = PREPARE_GENOME.out.dict + fai = PREPARE_GENOME.out.fai + gens_interval_list = PREPARE_GENOME.out.gens_interval_list + mutect2_target_bed = PREPARE_GENOME.out.mutect2_target_bed + + multiqc_config = channel.fromPath("${projectDir}/assets/multiqc_config.yml", checkIfExists: true) + multiqc_custom_config = params.multiqc_config ? channel.fromPath(params.multiqc_config, checkIfExists: true) : channel.empty() + multiqc_logo = params.multiqc_logo ? channel.fromPath(params.multiqc_logo, checkIfExists: true) : channel.empty() + multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + + versions = versions.mix(PREPARE_GENOME.out.versions) + // WORKFLOW: Run main workflow - // - NFCORE_CREATEPANELREFS ( - PIPELINE_INITIALISATION.out.samplesheet + NFCORE_CREATEPANELREFS( + PIPELINE_INITIALISATION.out.samplesheet, + params.tools ?: "no_tools", + params.gcnv_model_name, + params.gens_analysis_type, + params.gens_pon_name, + params.mutect2_pon_name, + fasta, + dict, + fai, + cnvkit_targets, + gcnv_exclude_bed, + gcnv_exclude_interval_list, + gcnv_mappable_regions, + gcnv_ploidy_priors, + gcnv_segmental_duplications, + gcnv_target_bed, + gcnv_target_interval_list, + gens_interval_list, + mutect2_target_bed, ) - // + + versions = versions.mix(NFCORE_CREATEPANELREFS.out.versions) + + // Collate and save software versions + collated_versions = softwareVersionsToYAML(versions).collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_createpanelrefs_software_mqc_versions.yml', + sort: true, + newLine: true, + ) + + // MODULE: MultiQC + multiqc_config = channel.fromPath( + "${projectDir}/assets/multiqc_config.yml", + checkIfExists: true + ) + multiqc_custom_config = params.multiqc_config + ? channel.fromPath(params.multiqc_config, checkIfExists: true) + : channel.empty() + multiqc_logo = params.multiqc_logo + ? channel.fromPath(params.multiqc_logo, checkIfExists: true) + : channel.empty() + + summary_params = paramsSummaryMap( + workflow, + parameters_schema: "nextflow_schema.json" + ) + workflow_summary = channel.value(paramsSummaryMultiqc(summary_params)) + multiqc_files = multiqc_files.mix( + workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') + ) + multiqc_custom_methods_description = params.multiqc_methods_description + ? file(params.multiqc_methods_description, checkIfExists: true) + : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + methods_description = channel.value( + methodsDescriptionText(multiqc_custom_methods_description) + ) + + multiqc_files = multiqc_files.mix(collated_versions) + multiqc_files = multiqc_files.mix( + methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: true, + ) + ) + + MULTIQC( + multiqc_files.collect(), + multiqc_config.toList(), + multiqc_custom_config.toList(), + multiqc_logo.toList(), + [], + [], + ) + // SUBWORKFLOW: Run completion tasks - // - PIPELINE_COMPLETION ( + PIPELINE_COMPLETION( params.email, params.email_on_fail, params.plaintext_email, params.outdir, params.monochrome_logs, params.hook_url, - NFCORE_CREATEPANELREFS.out.multiqc_report + MULTIQC.out.report.toList(), ) } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END + NAMED WORKFLOWS FOR PIPELINE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + +// WORKFLOW: Run main analysis pipeline depending on type of input +workflow NFCORE_CREATEPANELREFS { + take: + samplesheet // channel: samplesheet read in from --input + tools // string: comma separated list of tools to run + gcnv_model_name // string: name of gcnv model + gens_analysis_type // string: type of analysis for gens pon ('lrs' or 'srs') + gens_pon_name // string: name of gens pon + mutect2_pon_name // string: name of mutect2 pon + fasta // channel: [meta, fasta] + dict // channel: [meta, dict] + fai // channel: [meta, fai] + cnvkit_targets // channel: [meta, cnvkit_targets] + gcnv_exclude_bed // channel: [meta, gcnv_exclude_bed] + gcnv_exclude_interval_list // channel: [meta, gcnv_exclude_interval_list] + gcnv_mappable_regions // channel: [meta, gcnv_mappable_regions] + gcnv_ploidy_priors // channel: [meta, gcnv_ploidy_priors] + gcnv_segmental_duplications // channel: [meta, gcnv_segmental_duplications] + gcnv_target_bed // channel: [meta, gcnv_target_bed] + gcnv_target_interval_list // channel: [meta, gcnv_target_interval_list] + gens_interval_list // channel: [meta, gens_interval_list] + mutect2_target_bed // channel: [meta, mutect2_target_bed] + + main: + // WORKFLOW: Run pipeline + CREATEPANELREFS(samplesheet, tools, gcnv_model_name, gens_analysis_type, gens_pon_name, mutect2_pon_name, fasta, dict, fai, cnvkit_targets, gcnv_exclude_bed, gcnv_exclude_interval_list, gcnv_mappable_regions, gcnv_ploidy_priors, gcnv_segmental_duplications, gcnv_target_bed, gcnv_target_interval_list, gens_interval_list, mutect2_target_bed) + + emit: + versions = CREATEPANELREFS.out.versions // channel: versions.yml +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + DEFINE FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Get attribute from genome config file e.g. fasta +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[params.genome].containsKey(attribute)) { + return params.genomes[params.genome][attribute] + } + } + return null +} diff --git a/modules.json b/modules.json index 2681663..96ef637 100644 --- a/modules.json +++ b/modules.json @@ -5,33 +5,138 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { - "fastqc": { + "cat/cat": { "branch": "master", - "git_sha": "08108058ea36a63f141c25c4e75f9f872a5b2296", + "git_sha": "69614d4579a6bd9b8a2ecffb35959809d9c36559", + "installed_by": ["modules"] + }, + "cnvkit/batch": { + "branch": "master", + "git_sha": "09223d6de1dab602242c4c57ab2a4599d460e528", + "installed_by": ["modules"] + }, + "gatk4/annotateintervals": { + "branch": "master", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", + "installed_by": ["modules"] + }, + "gatk4/bedtointervallist": { + "branch": "master", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", + "installed_by": ["modules"] + }, + "gatk4/collectreadcounts": { + "branch": "master", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", + "installed_by": ["modules"] + }, + "gatk4/createreadcountpanelofnormals": { + "branch": "master", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", + "installed_by": ["modules"] + }, + "gatk4/createsequencedictionary": { + "branch": "master", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", + "installed_by": ["modules"] + }, + "gatk4/createsomaticpanelofnormals": { + "branch": "master", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", + "installed_by": ["bam_create_som_pon_gatk", "modules"] + }, + "gatk4/determinegermlinecontigploidy": { + "branch": "master", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", + "installed_by": ["modules"] + }, + "gatk4/filterintervals": { + "branch": "master", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", + "installed_by": ["modules"] + }, + "gatk4/genomicsdbimport": { + "branch": "master", + "git_sha": "6baf1c4cb0e05d7167c3d9280a03972f7fcbbbcb", + "installed_by": ["bam_create_som_pon_gatk", "modules"] + }, + "gatk4/germlinecnvcaller": { + "branch": "master", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", + "installed_by": ["modules"] + }, + "gatk4/indexfeaturefile": { + "branch": "master", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", + "installed_by": ["modules"] + }, + "gatk4/intervallisttools": { + "branch": "master", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", + "installed_by": ["modules"] + }, + "gatk4/mutect2": { + "branch": "master", + "git_sha": "6baf1c4cb0e05d7167c3d9280a03972f7fcbbbcb", + "installed_by": ["bam_create_som_pon_gatk", "modules"] + }, + "gatk4/preprocessintervals": { + "branch": "master", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", + "installed_by": ["modules"] + }, + "gawk": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "mosdepth": { + "branch": "master", + "git_sha": "6832b69ef7f98c54876d6436360b6b945370c615", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad", + "git_sha": "af27af1be706e6a2bb8fe454175b0cdf77f47b49", + "installed_by": ["modules"] + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "c8be52dba1166c678e74cda9c3a3c221635c8bb1", + "installed_by": ["modules"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "c8be52dba1166c678e74cda9c3a3c221635c8bb1", + "installed_by": ["modules"] + }, + "samtools/view": { + "branch": "master", + "git_sha": "c8be52dba1166c678e74cda9c3a3c221635c8bb1", "installed_by": ["modules"] } } }, "subworkflows": { "nf-core": { + "bam_create_som_pon_gatk": { + "branch": "master", + "git_sha": "6baf1c4cb0e05d7167c3d9280a03972f7fcbbbcb", + "installed_by": ["subworkflows"] + }, "utils_nextflow_pipeline": { "branch": "master", - "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "51ae5406a030d4da1e49e4dab49756844fdd6c7a", + "git_sha": "271e7fc14eb1320364416d996fb077421f3faed2", "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", - "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", + "git_sha": "4b406a74dc0449c0401ed87d5bfff4252fd277fd", "installed_by": ["subworkflows"] } } diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 0000000..9851176 --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 0000000..aa72fc4 --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,78 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.8' : + 'biocontainers/pigz:2.8' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + tuple val("${task.process}"), val("pigz"), eval("pigz --version 2>&1 | sed 's/pigz //g'"), topic: versions, emit: versions_cat + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // choose appropriate concatenation tool depending on input and output format + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 0000000..36a7359 --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,63 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" + ontologies: [] +output: + file_out: + - - meta: + type: map + description: Groovy Map containing sample information + - ${prefix}: + type: file + description: Concatenated file. Will be gzipped if file_out ends with + ".gz" + pattern: "${file_out}" + ontologies: [] + versions_cat: + - - ${task.process}: + type: string + description: The name of the process + - pigz: + type: string + description: The name of the tool + - "pigz --version 2>&1 | sed 's/pigz //g'": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - pigz: + type: string + description: The name of the tool + - "pigz --version 2>&1 | sed 's/pigz //g'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 0000000..030c664 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,192 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("sarscov2 - genome - error: name conflict") { + when { + process { + """ + input[0] = + [ + [ id:'genome', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ fasta, sizes ] - unzipped") { + when { + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("sarscov2 - [ gff3_gz, maf_gz ] - zipped") { + when { + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() + } + ) + } + } + + test("sarscov2 - [ gff3_gz, maf_gz ] - unzipped") { + config './nextflow.config' + + when { + params { + cat_prefix = "cat.txt" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ fasta, sizes ] - zipped") { + config './nextflow.config' + + when { + params { + cat_prefix = "cat.txt.gz" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - fasta - zipped") { + config './nextflow.config' + + when { + params { + cat_prefix = "cat.txt.gz" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - fasta - unzipped - stub") { + options "-stub" + + when { + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 0000000..5b4e4cc --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,283 @@ +{ + "sarscov2 - [ gff3_gz, maf_gz ] - unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T09:08:31.479828" + }, + "sarscov2 - fasta - unzipped - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T16:16:28.118094" + }, + "sarscov2 - [ fasta, sizes ] - zipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T16:15:56.529595" + }, + "sarscov2 - genome - error: name conflict": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "file_out": [ + + ], + "versions_cat": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T16:14:54.496538" + }, + "sarscov2 - [ fasta, sizes ] - unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T11:26:29.942203" + }, + "sarscov2 - [ gff3_gz, maf_gz ] - zipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T11:26:45.679401" + }, + "sarscov2 - fasta - zipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,6e9fe4042a72f2345f644f239272b7e6" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,6e9fe4042a72f2345f644f239272b7e6" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T16:16:12.439911" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow.config b/modules/nf-core/cat/cat/tests/nextflow.config new file mode 100644 index 0000000..5bc9bf5 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = "${params.cat_prefix}" + } +} diff --git a/modules/nf-core/cnvkit/batch/environment.yml b/modules/nf-core/cnvkit/batch/environment.yml new file mode 100644 index 0000000..76271e4 --- /dev/null +++ b/modules/nf-core/cnvkit/batch/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::cnvkit=0.9.12 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/cnvkit/batch/main.nf b/modules/nf-core/cnvkit/batch/main.nf new file mode 100644 index 0000000..2a81901 --- /dev/null +++ b/modules/nf-core/cnvkit/batch/main.nf @@ -0,0 +1,138 @@ +process CNVKIT_BATCH { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/3e/3e8542cdb0190cfe2cedd74f714f021a2ffa94be3ec2a5b95ff52610cb3e2c34/data' + : 'community.wave.seqera.io/library/cnvkit_htslib_samtools:86928c121163aca7'}" + + input: + tuple val(meta), path(tumor), path(normal) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fasta_fai) + tuple val(meta4), path(targets) + tuple val(meta5), path(reference) + val panel_of_normals + + output: + tuple val(meta), path("*.bed"), emit: bed + tuple val(meta), path("*.cnn"), emit: cnn, optional: true + tuple val(meta), path("*.cnr"), emit: cnr, optional: true + tuple val(meta), path("*.cns"), emit: cns, optional: true + tuple val(meta), path("*.pdf"), emit: pdf, optional: true + tuple val(meta), path("*.png"), emit: png, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + script: + def args = task.ext.args ?: '' + def tumor_exists = tumor ? true : false + def normal_exists = normal ? true : false + def reference_exists = reference ? true : false + // execute samtools only when cram files are input, cnvkit runs natively on cram but is prohibitively slow + def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false + def normal_cram = normal_exists && normal.Extension == "cram" ? true : false + + def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}" + + // tumor_only mode does not need fasta & target + // instead a pre-computed reference.cnn may be supplied which is built from fasta & target + def (normal_out, normal_args, fasta_args) = ["", "", ""] + def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : "" + + if (normal_exists) { + def normal_prefix = normal.BaseName + normal_out = normal_cram ? "${normal_prefix}" + ".bam" : "${normal}" + fasta_args = fasta ? "--fasta ${fasta}" : "" + + // germline mode + // normal samples must be input without a flag + // requires flag --normal to be empty [] + if (!tumor_exists) { + tumor_out = "${normal_prefix}" + ".bam" + normal_args = "--normal " + } + else { + normal_args = normal_prefix ? "--normal ${normal_out}" : "" + } + if (reference_exists) { + fasta_args = "" + normal_args = "" + } + } + // generation of panel of normals + def generate_pon = panel_of_normals ? true : false + + if (generate_pon && !tumor_exists) { + def pon_input = normal.join(' ') + normal_args = "--normal ${pon_input}" + tumor_out = "" + } + + // tumor_only mode and no reference + // generate a "flat" reference which assumes equal coverage + // by passing '--normal' without any files + if (!reference_exists & !normal_exists & tumor_exists) { + normal_args = normal_args ?: "--normal" + } + + def target_args = targets && !reference_exists ? "--targets ${targets}" : "" + def reference_args = reference ? "--reference ${reference}" : "" + + def samtools_cram_convert = '' + samtools_cram_convert += normal_cram ? " samtools view -T ${fasta} ${fai_reference} ${normal} -@ ${task.cpus} -o ${normal_out}\n" : '' + samtools_cram_convert += normal_cram ? " samtools index ${normal_out}\n" : '' + samtools_cram_convert += tumor_cram ? " samtools view -T ${fasta} ${fai_reference} ${tumor} -@ ${task.cpus} -o ${tumor_out}\n" : '' + samtools_cram_convert += tumor_cram ? " samtools index ${tumor_out}\n" : '' + def versions = normal_cram || tumor_cram + ? "samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')\n cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g')" + : "cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g')" + """ + ${samtools_cram_convert} + cnvkit.py \\ + batch \\ + ${tumor_out} \\ + ${normal_args} \\ + ${fasta_args} \\ + ${reference_args} \\ + ${target_args} \\ + --processes ${task.cpus} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ${versions} + END_VERSIONS + """ + stub: + def tumor_exists = tumor ? true : false + def normal_exists = normal ? true : false + def reference_exists = reference ? true : false + // identify BED naming pattern + def bed_prefix = reference_exists ? reference.BaseName : targets ? targets.BaseName : "" + def bed_suffix = reference_exists ? "-tmp.bed" : ".bed" + // execute samtools only when cram files are input, cnvkit runs natively on cram but is prohibitively slow + def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false + def normal_cram = normal_exists && normal.Extension == "cram" ? true : false + def out_base_name = tumor_exists ? tumor.BaseName : normal.BaseName + def versions = normal_cram || tumor_cram + ? "samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')\n cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g')" + : "cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g')" + """ + touch ${bed_prefix}.antitarget${bed_suffix} + touch ${bed_prefix}.target${bed_suffix} + touch "reference.cnn" + touch ${out_base_name}.antitargetcoverage.cnn + touch ${out_base_name}.bintest.cns + touch ${out_base_name}.call.cns + touch ${out_base_name}.cnr + touch ${out_base_name}.cns + touch ${out_base_name}.targetcoverage.cnn + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ${versions} + END_VERSIONS + """ +} diff --git a/modules/nf-core/cnvkit/batch/meta.yml b/modules/nf-core/cnvkit/batch/meta.yml new file mode 100644 index 0000000..d4ee78e --- /dev/null +++ b/modules/nf-core/cnvkit/batch/meta.yml @@ -0,0 +1,170 @@ +name: cnvkit_batch +description: Copy number variant detection from high-throughput sequencing data +keywords: + - cnvkit + - bam + - fasta + - copy number +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html + licence: ["Apache-2.0"] + identifier: biotools:cnvkit +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tumor: + type: file + description: | + Input tumour sample bam file (or cram) + ontologies: [] + - normal: + type: file + description: | + Input normal sample bam file (or cram) + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: | + Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta_fai: + type: file + description: | + Input reference genome fasta index (optional, but recommended for cram_input) + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing information about target file + e.g. [ id:'test' ] + - targets: + type: file + description: | + Input target bed file + ontologies: [] + - - meta5: + type: map + description: | + Groovy Map containing information about reference file + e.g. [ id:'test' ] + - reference: + type: file + description: | + Input reference cnn-file (only for germline and tumor-only running) + ontologies: [] + - panel_of_normals: + type: file + description: | + Input panel of normals file + ontologies: [] +output: + bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bed": + type: file + description: File containing genomic regions + pattern: "*.{bed}" + ontologies: [] + cnn: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cnn": + type: file + description: File containing coverage information + pattern: "*.{cnn}" + ontologies: [] + cnr: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cnr": + type: file + description: File containing copy number ratio information + pattern: "*.{cnr}" + ontologies: [] + cns: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cns": + type: file + description: File containing copy number segment information + pattern: "*.{cns}" + ontologies: [] + pdf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.pdf": + type: file + description: File with plot of copy numbers or segments on chromosomes + pattern: "*.{pdf}" + ontologies: [] + png: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.png": + type: file + description: File with plot of bin-level log2 coverages and segmentation calls + pattern: "*.{png}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@adamrtalbot" + - "@drpatelh" + - "@fbdtemme" + - "@kaurravneet4123" + - "@KevinMenden" + - "@lassefolkersen" + - "@MaxUlysse" + - "@priesgo" + - "@SusiJo" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@fbdtemme" + - "@kaurravneet4123" + - "@KevinMenden" + - "@lassefolkersen" + - "@MaxUlysse" + - "@priesgo" + - "@SusiJo" diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml deleted file mode 100644 index 691d4c7..0000000 --- a/modules/nf-core/fastqc/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -channels: - - conda-forge - - bioconda -dependencies: - - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf deleted file mode 100644 index 033f415..0000000 --- a/modules/nf-core/fastqc/main.nf +++ /dev/null @@ -1,64 +0,0 @@ -process FASTQC { - tag "${meta.id}" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : - 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // Make list of old name and new name pairs to use for renaming in the bash while loop - def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } - def rename_to = old_new_pairs*.join(' ').join(' ') - def renamed_files = old_new_pairs.collect{ _old_name, new_name -> new_name }.join(' ') - - // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) - // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 - // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label - def memory_in_mb = task.memory ? task.memory.toUnit('MB').toFloat() / task.cpus : null - // FastQC memory value allowed range (100 - 10000) - def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) - - """ - printf "%s %s\\n" ${rename_to} | while read old_name new_name; do - [ -f "\${new_name}" ] || ln -s \$old_name \$new_name - done - - fastqc \\ - ${args} \\ - --threads ${task.cpus} \\ - --memory ${fastqc_memory} \\ - ${renamed_files} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml deleted file mode 100644 index 2b2e62b..0000000 --- a/modules/nf-core/fastqc/meta.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: fastqc -description: Run FastQC on sequenced reads -keywords: - - quality control - - qc - - adapters - - fastq -tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ - licence: ["GPL-2.0-only"] - identifier: biotools:fastqc -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - html: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.html": - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - - zip: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.zip": - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" -maintainers: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test deleted file mode 100644 index e9d79a0..0000000 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ /dev/null @@ -1,309 +0,0 @@ -nextflow_process { - - name "Test Process FASTQC" - script "../main.nf" - process "FASTQC" - - tag "modules" - tag "modules_nfcore" - tag "fastqc" - - test("sarscov2 single-end [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [ id: 'test', single_end:true ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. - // looks like this:
Mon 2 Oct 2023
test.gz
- // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 paired-end [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, - { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, - { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, - { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, - { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 interleaved [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 paired-end [bam]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 multiple [fastq]") { - - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, - { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, - { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, - { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, - { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, - { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, - { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, - { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, - { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, - { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 custom_prefix") { - - when { - process { - """ - input[0] = Channel.of([ - [ id:'mysample', single_end:true ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } - ) - } - } - - test("sarscov2 single-end [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [ id: 'test', single_end:true ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 paired-end [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 interleaved [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 paired-end [bam] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 multiple [fastq] - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [id: 'test', single_end: false], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 custom_prefix - stub") { - - options "-stub" - when { - process { - """ - input[0] = Channel.of([ - [ id:'mysample', single_end:true ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ]) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } -} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap deleted file mode 100644 index d5db309..0000000 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ /dev/null @@ -1,392 +0,0 @@ -{ - "sarscov2 custom_prefix": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:16.374038" - }, - "sarscov2 single-end [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": true - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": true - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": true - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:24.993809" - }, - "sarscov2 custom_prefix - stub": { - "content": [ - { - "0": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "mysample", - "single_end": true - }, - "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:03:10.93942" - }, - "sarscov2 interleaved [fastq]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:01:42.355718" - }, - "sarscov2 paired-end [bam]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:01:53.276274" - }, - "sarscov2 multiple [fastq]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:05.527626" - }, - "sarscov2 paired-end [fastq]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:01:31.188871" - }, - "sarscov2 paired-end [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:34.273566" - }, - "sarscov2 multiple [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:03:02.304411" - }, - "sarscov2 single-end [fastq]": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:01:19.095607" - }, - "sarscov2 interleaved [fastq] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:44.640184" - }, - "sarscov2 paired-end [bam] - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "html": [ - [ - { - "id": "test", - "single_end": false - }, - "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ], - "zip": [ - [ - { - "id": "test", - "single_end": false - }, - "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-07-22T11:02:53.550742" - } -} \ No newline at end of file diff --git a/modules/nf-core/gatk4/annotateintervals/environment.yml b/modules/nf-core/gatk4/annotateintervals/environment.yml new file mode 100644 index 0000000..67e0eb8 --- /dev/null +++ b/modules/nf-core/gatk4/annotateintervals/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/annotateintervals/main.nf b/modules/nf-core/gatk4/annotateintervals/main.nf new file mode 100644 index 0000000..50556d5 --- /dev/null +++ b/modules/nf-core/gatk4/annotateintervals/main.nf @@ -0,0 +1,70 @@ +process GATK4_ANNOTATEINTERVALS { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" + + input: + tuple val(meta), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fasta_fai) + tuple val(meta4), path(dict) + tuple val(meta5), path(mappable_regions) + tuple val(meta6), path(mappable_regions_tbi) + tuple val(meta7), path(segmental_duplication_regions) + tuple val(meta8), path(segmental_duplication_regions_tbi) + + output: + tuple val(meta), path("*.tsv"), emit: annotated_intervals + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def inputs = intervals.collect { "--intervals ${it}" }.join(" ") + def mappability_track = mappable_regions ? "--mappability-track ${mappable_regions}" : "" + def segmental_duplication_tracks = segmental_duplication_regions ? "--segmental-duplication-track ${segmental_duplication_regions}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK AnnotateIntervals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + AnnotateIntervals \\ + ${inputs} \\ + --reference ${fasta} \\ + --output ${prefix}.tsv \\ + ${mappability_track} \\ + ${segmental_duplication_tracks} \\ + --tmp-dir . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/annotateintervals/meta.yml b/modules/nf-core/gatk4/annotateintervals/meta.yml new file mode 100644 index 0000000..7c6de2f --- /dev/null +++ b/modules/nf-core/gatk4/annotateintervals/meta.yml @@ -0,0 +1,129 @@ +name: "gatk4_annotateintervals" +description: Annotates intervals with GC content, mappability, and segmental-duplication + content +keywords: + - annotateintervals + - annotation + - bed + - gatk4 + - intervals +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals: + type: file + description: One or more interval files to annotate + pattern: "*.{interval_list,list,bed}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: The reference FASTA file + pattern: "*.{fasta,fa}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta_fai: + type: file + description: The index of the reference FASTA file + pattern: "*.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - dict: + type: file + description: The sequence dictionary reference FASTA file + pattern: "*.dict" + ontologies: [] + - - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - mappable_regions: + type: file + description: | + Optional - Umap single-read mappability track + The track should correspond to the appropriate read length and overlapping intervals must be merged + pattern: "*.bed(.gz)?" + ontologies: [] + - - meta6: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - mappable_regions_tbi: + type: file + description: Optional - The index of the gzipped umap single-read mappability + track + pattern: "*.bed.gz.tbi" + ontologies: [] + - - meta7: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - segmental_duplication_regions: + type: file + description: Optional - Segmental-duplication track + pattern: "*.bed(.gz)?" + ontologies: [] + - - meta8: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - segmental_duplication_regions_tbi: + type: file + description: Optional - The index of the gzipped segmental-duplication track + pattern: "*.bed.gz.tbi" + ontologies: [] +output: + annotated_intervals: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: The output TSV file with a SAM-style header containing the annotated + intervals + pattern: "*.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/gatk4/bedtointervallist/environment.yml b/modules/nf-core/gatk4/bedtointervallist/environment.yml new file mode 100644 index 0000000..67e0eb8 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/bedtointervallist/main.nf b/modules/nf-core/gatk4/bedtointervallist/main.nf new file mode 100644 index 0000000..339e11c --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/main.nf @@ -0,0 +1,57 @@ +process GATK4_BEDTOINTERVALLIST { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" + + input: + tuple val(meta), path(bed) + tuple val(meta2), path(dict) + + output: + tuple val(meta), path('*.interval_list'), emit: interval_list + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + BedToIntervalList \\ + --INPUT ${bed} \\ + --OUTPUT ${prefix}.interval_list \\ + --SEQUENCE_DICTIONARY ${dict} \\ + --TMP_DIR . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.interval_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/bedtointervallist/meta.yml b/modules/nf-core/gatk4/bedtointervallist/meta.yml new file mode 100644 index 0000000..b186132 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/meta.yml @@ -0,0 +1,64 @@ +name: gatk4_bedtointervallist +description: Creates an interval list from a bed file and a reference dict +keywords: + - bed + - bedtointervallist + - gatk4 + - interval list +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - bed: + type: file + description: Input bed file + pattern: "*.bed" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: Sequence dictionary + pattern: "*.dict" + ontologies: [] +output: + interval_list: + - - meta: + type: file + description: gatk interval list file + pattern: "*.interval_list" + ontologies: [] + - "*.interval_list": + type: file + description: gatk interval list file + pattern: "*.interval_list" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@kevinmenden" + - "@ramprasadn" +maintainers: + - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/collectreadcounts/environment.yml b/modules/nf-core/gatk4/collectreadcounts/environment.yml new file mode 100644 index 0000000..67e0eb8 --- /dev/null +++ b/modules/nf-core/gatk4/collectreadcounts/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/collectreadcounts/main.nf b/modules/nf-core/gatk4/collectreadcounts/main.nf new file mode 100644 index 0000000..90083d8 --- /dev/null +++ b/modules/nf-core/gatk4/collectreadcounts/main.nf @@ -0,0 +1,74 @@ +process GATK4_COLLECTREADCOUNTS { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" + + input: + tuple val(meta), path(input), path(input_index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + + output: + tuple val(meta), path("*.hdf5"), emit: hdf5, optional: true + tuple val(meta), path("*.tsv"), emit: tsv, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def reference = fasta ? "--reference ${fasta}" : "" + def extension = args.contains("--format HDF5") + ? "hdf5" + : args.contains("--format TSV") + ? "tsv" + : "hdf5" + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK COLLECTREADCOUNTS] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CollectReadCounts \\ + --input ${input} \\ + --intervals ${intervals} \\ + --output ${prefix}.${extension} \\ + ${reference} \\ + --tmp-dir . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--format HDF5") + ? "hdf5" + : args.contains("--format TSV") + ? "tsv" + : "hdf5" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/collectreadcounts/meta.yml b/modules/nf-core/gatk4/collectreadcounts/meta.yml new file mode 100644 index 0000000..7370d01 --- /dev/null +++ b/modules/nf-core/gatk4/collectreadcounts/meta.yml @@ -0,0 +1,107 @@ +name: "gatk4_collectreadcounts" +description: Collects read counts at specified intervals. The count for each interval + is calculated by counting the number of read starts that lie in the interval. +keywords: + - collectreadcounts + - bam + - cram + - gatk4 +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform + at the Broad Institute, the toolkit offers a wide variety of tools with a primary + focus on variant discovery and genotyping. Its powerful processing engine and + high-performance computing features make it capable of taking on projects of + any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037593911-CombineGVCFs + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - input_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + - intervals: + type: file + description: A file containing the specified intervals + pattern: "*.{bed,intervals}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Optional - Reference FASTA + pattern: "*.{fasta,fa}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: Optional - Index of the reference FASTA file + pattern: "*.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - dict: + type: file + description: Optional - Sequence dictionary of the reference FASTA file + pattern: "*.dict" + ontologies: [] +output: + hdf5: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.hdf5": + type: file + description: The read counts in hdf5 format + pattern: "*.hdf5" + ontologies: [] + tsv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: The read counts in TSV format + pattern: "*.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml b/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml new file mode 100644 index 0000000..67e0eb8 --- /dev/null +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf b/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf new file mode 100644 index 0000000..48c927b --- /dev/null +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf @@ -0,0 +1,55 @@ +process GATK4_CREATEREADCOUNTPANELOFNORMALS { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" + + input: + tuple val(meta), path(counts) + + output: + tuple val(meta), path("*.hdf5"), emit: pon + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = counts.collect { "--input ${it}" }.join(" ") + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK CreateReadCountPanelOfNormals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateReadCountPanelOfNormals \\ + ${args} \\ + ${input_list} \\ + --output ${prefix}.hdf5 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.hdf5 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml b/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml new file mode 100644 index 0000000..66fe086 --- /dev/null +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml @@ -0,0 +1,54 @@ +name: "gatk4_createreadcountpanelofnormals" +description: Creates a panel of normals (PoN) for read-count denoising given the read + counts for samples in the panel. +keywords: + - createreadcountpanelofnormals + - gatk4 + - panelofnormals +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + tool_dev_url: "https://github.com/broadinstitute/gatk" + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - counts: + type: file + description: Read counts in hdf5 or tsv format. + pattern: "*.{hdf5,tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV +output: + pon: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.hdf5": + type: file + description: Panel-of-normals file. + pattern: "*.{hdf5}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/createsequencedictionary/environment.yml b/modules/nf-core/gatk4/createsequencedictionary/environment.yml new file mode 100644 index 0000000..67e0eb8 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf new file mode 100644 index 0000000..a807400 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf @@ -0,0 +1,53 @@ +process GATK4_CREATESEQUENCEDICTIONARY { + tag "${fasta}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('*.dict'), emit: dict + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def avail_mem = 6144 + if (!task.memory) { + log.info('[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateSequenceDictionary \\ + --REFERENCE ${fasta} \\ + --URI ${fasta} \\ + --TMP_DIR . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta.baseName}.dict + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/gatk4/createsequencedictionary/meta.yml new file mode 100644 index 0000000..72dced2 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/meta.yml @@ -0,0 +1,54 @@ +name: gatk4_createsequencedictionary +description: Creates a sequence dictionary for a reference sequence +keywords: + - createsequencedictionary + - dictionary + - fasta + - gatk4 +tools: + - gatk: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Input fasta file + pattern: "*.{fasta,fa}" + ontologies: [] +output: + dict: + - - meta: + type: file + description: gatk dictionary file + pattern: "*.{dict}" + ontologies: [] + - "*.dict": + type: file + description: gatk dictionary file + pattern: "*.{dict}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@maxulysse" + - "@ramprasadn" +maintainers: + - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml b/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml new file mode 100644 index 0000000..67e0eb8 --- /dev/null +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf b/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf new file mode 100644 index 0000000..ebb5fe2 --- /dev/null +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf @@ -0,0 +1,61 @@ +process GATK4_CREATESOMATICPANELOFNORMALS { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" + + input: + tuple val(meta), path(genomicsdb) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi"), emit: tbi + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK CreateSomaticPanelOfNormals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateSomaticPanelOfNormals \\ + --variant gendb://${genomicsdb} \\ + --output ${prefix}.vcf.gz \\ + --reference ${fasta} \\ + --tmp-dir . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "${prefix}" | gzip -c > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml b/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml new file mode 100644 index 0000000..b09e1a3 --- /dev/null +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml @@ -0,0 +1,88 @@ +name: gatk4_createsomaticpanelofnormals +description: Create a panel of normals constraining germline and artifactual sites + for use with mutect2. +keywords: + - createsomaticpanelofnormals + - gatk4 + - panelofnormals +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - genomicsdb: + type: file + description: GenomicsDB database + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test'] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test'] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test'] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: A Groovy map containing sample information + - "*.vcf.gz": + type: file + description: panel of normal as compressed vcf file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + tbi: + - - meta: + type: map + description: A Groovy map containing sample information + - "*.tbi": + type: file + description: Tabix index of vcf file + pattern: "*vcf.gz.tbi" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/environment.yml b/modules/nf-core/gatk4/determinegermlinecontigploidy/environment.yml new file mode 100644 index 0000000..67e0eb8 --- /dev/null +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf b/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf new file mode 100644 index 0000000..4db4969 --- /dev/null +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf @@ -0,0 +1,74 @@ +process GATK4_DETERMINEGERMLINECONTIGPLOIDY { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" + + input: + tuple val(meta), path(counts), path(bed), path(exclude_beds) + tuple val(meta2), path(ploidy_model) + path contig_ploidy_table + + output: + tuple val(meta), path("${prefix}-calls"), emit: calls + tuple val(meta), path("${prefix}-model"), emit: model, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def intervals = bed ? "--intervals ${bed}" : "" + def exclude = exclude_beds ? exclude_beds.collect { "--exclude-intervals ${it}" }.join(" ") : "" + def contig_ploidy = contig_ploidy_table ? "--contig-ploidy-priors ${contig_ploidy_table}" : "" + def model = ploidy_model ? "--model ${ploidy_model}" : "" + def input_list = counts.collect { "--input ${it}" }.join(" ") + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK DetermineGermlineContigPloidy] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + export THEANO_FLAGS="base_compiledir=\$PWD" + export PYTENSOR_FLAGS="base_compiledir=\$PWD" + export OMP_NUM_THREADS=${task.cpus} + export MKL_NUM_THREADS=${task.cpus} + + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + DetermineGermlineContigPloidy \\ + ${input_list} \\ + --output ./ \\ + --output-prefix ${prefix} \\ + ${intervals} \\ + ${exclude} \\ + ${contig_ploidy} \\ + ${model} \\ + --tmp-dir . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}-calls + touch ${prefix}-model + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml b/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml new file mode 100644 index 0000000..45e7176 --- /dev/null +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml @@ -0,0 +1,94 @@ +name: "gatk4_determinegermlinecontigploidy" +description: Determines the baseline contig ploidy for germline samples given counts + data +keywords: + - copy number + - counts + - determinegermlinecontigploidy + - gatk4 +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - counts: + type: file + description: One or more count TSV files created with gatk/collectreadcounts + pattern: "*.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - bed: + type: file + description: Optional - A bed file containing the intervals to include in the + process + pattern: "*.bed" + ontologies: [] + - exclude_beds: + type: file + description: Optional - One or more bed files containing intervals to exclude + from the process + pattern: "*.bed" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ploidy_model: + type: directory + description: | + Optional - A folder containing the ploidy model. + When a model is supplied to tool will run in CASE mode. + pattern: '*-model/' + - contig_ploidy_table: + type: file + description: The contig ploidy priors table + pattern: "*.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV +output: + calls: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}-calls: + type: directory + description: A folder containing the calls from the input files + pattern: "*-calls/" + model: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}-model: + type: directory + description: | + A folder containing the model from the input files. + This will only be created in COHORT mode (when no model is supplied to the process). + pattern: "*-model/" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/gatk4/filterintervals/environment.yml b/modules/nf-core/gatk4/filterintervals/environment.yml new file mode 100644 index 0000000..67e0eb8 --- /dev/null +++ b/modules/nf-core/gatk4/filterintervals/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/filterintervals/main.nf b/modules/nf-core/gatk4/filterintervals/main.nf new file mode 100644 index 0000000..7733bd8 --- /dev/null +++ b/modules/nf-core/gatk4/filterintervals/main.nf @@ -0,0 +1,60 @@ +process GATK4_FILTERINTERVALS { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" + + input: + tuple val(meta), path(intervals) + tuple val(meta2), path(read_counts) + tuple val(meta3), path(annotated_intervals) + + output: + tuple val(meta), path("*.interval_list"), emit: interval_list + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def annotated_command = annotated_intervals ? "--annotated-intervals ${annotated_intervals}" : "" + def read_counts_command = read_counts ? read_counts.collect { "--input ${it}" }.join(" ") : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK FilterIntervals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + FilterIntervals \\ + ${annotated_command} \\ + ${read_counts_command} \\ + --intervals ${intervals} \\ + --output ${prefix}.interval_list \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.interval_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/filterintervals/meta.yml b/modules/nf-core/gatk4/filterintervals/meta.yml new file mode 100644 index 0000000..b897744 --- /dev/null +++ b/modules/nf-core/gatk4/filterintervals/meta.yml @@ -0,0 +1,75 @@ +name: "gatk4_filterintervals" +description: Filters intervals based on annotations and/or count statistics. +keywords: + - filterintervals + - gatk4 + - interval_list +tools: + - "gatk4": + description: Developed in the Data Sciences Platform at the Broad Institute, the + toolkit offers a wide variety of tools with a primary focus on variant discovery + and genotyping. Its powerful processing engine and high-performance computing + features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - intervals: + type: file + description: Processed interval list file (processed_intervals.interval_list) + pattern: "*.interval_list" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - read_counts: + type: file + description: Read counts input file + pattern: "*.{tsv, hdf5}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - annotated_intervals: + type: file + description: Annotated intervals TSV file (annotated_intervals.tsv). + pattern: "*.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV +output: + interval_list: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.interval_list": + type: file + description: Filtered interval list file + pattern: "*.interval_list" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@ryanjameskennedy" + - "@ViktorHy" +maintainers: + - "@ryanjameskennedy" + - "@ViktorHy" diff --git a/modules/nf-core/gatk4/genomicsdbimport/environment.yml b/modules/nf-core/gatk4/genomicsdbimport/environment.yml new file mode 100644 index 0000000..67e0eb8 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/genomicsdbimport/main.nf b/modules/nf-core/gatk4/genomicsdbimport/main.nf new file mode 100644 index 0000000..0b3341b --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/main.nf @@ -0,0 +1,105 @@ +process GATK4_GENOMICSDBIMPORT { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" + + input: + tuple val(meta), path(vcf), path(tbi), path(interval_file), val(interval_value), path(wspace) + val run_intlist + val run_updatewspace + val input_map + + output: + tuple val(meta), path("${prefix}"), emit: genomicsdb, optional: true + tuple val(meta), path("${updated_db}"), emit: updatedb, optional: true + tuple val(meta), path("*.interval_list"), emit: intervallist, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + // settings for running default create gendb mode + input_command = input_map ? "--sample-name-map ${vcf[0]}" : vcf.collect { vcf_ -> "--variant ${vcf_}" }.join(' ') + + genomicsdb_command = "--genomicsdb-workspace-path ${prefix}" + interval_command = interval_file ? "--intervals ${interval_file}" : "--intervals ${interval_value}" + updated_db = "" + + // settings changed for running get intervals list mode if run_intlist is true + if (run_intlist) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = "--output-interval-list-to-file ${prefix}.interval_list" + } + + // settings changed for running update gendb mode. input_command same as default, update_db forces module to emit the updated gendb + if (run_updatewspace) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = '' + updated_db = "${wspace}" + } + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK GenomicsDBImport] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + GenomicsDBImport \\ + ${input_command} \\ + ${genomicsdb_command} \\ + ${interval_command} \\ + --tmp-dir . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + + genomicsdb_command = "--genomicsdb-workspace-path ${prefix}" + interval_command = interval_file ? "--intervals ${interval_file}" : "--intervals ${interval_value}" + updated_db = "" + + // settings changed for running get intervals list mode if run_intlist is true + if (run_intlist) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = "--output-interval-list-to-file ${prefix}.interval_list" + } + + // settings changed for running update gendb mode. input_command same as default, update_db forces module to emit the updated gendb + if (run_updatewspace) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = '' + updated_db = "${wspace}" + } + + def stub_genomicsdb = genomicsdb_command == "--genomicsdb-workspace-path ${prefix}" ? "touch ${prefix}" : "" + def stub_interval = interval_command == "--output-interval-list-to-file ${prefix}.interval_list" ? "touch ${prefix}.interval_list" : "" + def stub_update = updated_db != "" ? "touch ${wspace}" : "" + + """ + ${stub_genomicsdb} + ${stub_interval} + ${stub_update} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/genomicsdbimport/meta.yml b/modules/nf-core/gatk4/genomicsdbimport/meta.yml new file mode 100644 index 0000000..0da0cc4 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/meta.yml @@ -0,0 +1,105 @@ +name: gatk4_genomicsdbimport +description: merge GVCFs from multiple samples. For use in joint genotyping or somatic + panel of normal creation. +keywords: + - gatk4 + - genomicsdb + - genomicsdbimport + - jointgenotyping + - panelofnormalscreation +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: list + description: either a list of vcf files to be used to create or update a genomicsdb, + or a file that contains a map to vcf files to be used. + pattern: "*.vcf.gz" + - tbi: + type: list + description: list of tbi files that match with the input vcf files + pattern: "*.vcf.gz_tbi" + - interval_file: + type: file + description: file containing the intervals to be used when creating the genomicsdb + pattern: "*.interval_list" + ontologies: [] + - interval_value: + type: string + description: if an intervals file has not been specified, the value entered + here will be used as an interval via the "-L" argument + pattern: "example: chr1:1000-10000" + - wspace: + type: file + description: path to an existing genomicsdb to be used in update db mode or + get intervals mode. This WILL NOT specify name of a new genomicsdb in create + db mode. + pattern: "/path/to/existing/gendb" + ontologies: [] + - run_intlist: + type: boolean + description: Specify whether to run get interval list mode, this option cannot + be specified at the same time as run_updatewspace. + pattern: "true/false" + - run_updatewspace: + type: boolean + description: Specify whether to run update genomicsdb mode, this option takes + priority over run_intlist. + pattern: "true/false" + - input_map: + type: boolean + description: Specify whether the vcf input is providing a list of vcf file(s) + or a single file containing a map of paths to vcf files to be used to create + or update a genomicsdb. + pattern: "*.sample_map" +output: + genomicsdb: + - - meta: + type: map + description: A Groovy map containing sample information + - ${prefix}: + type: file + description: genomicsdb + ontologies: [] + updatedb: + - - meta: + type: map + description: A Groovy map containing sample information + - ${updated_db}: + type: file + description: updated genomicsdb + ontologies: [] + intervallist: + - - meta: + type: map + description: A Groovy map containing sample information + - "*.interval_list": + type: file + description: File containing the intervals used to generate the genomicsdb, + only created by get intervals mode. + pattern: "*.interval_list" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/germlinecnvcaller/environment.yml b/modules/nf-core/gatk4/germlinecnvcaller/environment.yml new file mode 100644 index 0000000..67e0eb8 --- /dev/null +++ b/modules/nf-core/gatk4/germlinecnvcaller/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/germlinecnvcaller/main.nf b/modules/nf-core/gatk4/germlinecnvcaller/main.nf new file mode 100644 index 0000000..6da848d --- /dev/null +++ b/modules/nf-core/gatk4/germlinecnvcaller/main.nf @@ -0,0 +1,72 @@ +process GATK4_GERMLINECNVCALLER { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" + + input: + tuple val(meta), path(tsv), path(intervals), path(ploidy), path(model) + + output: + tuple val(meta), path("*-cnv-model/*-calls"), emit: cohortcalls, optional: true + tuple val(meta), path("*-cnv-model/*-model"), emit: cohortmodel, optional: true + tuple val(meta), path("*-cnv-calls/*-calls"), emit: casecalls, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def intervals_command = intervals ? "--intervals ${intervals}" : "" + def ploidy_command = ploidy ? "--contig-ploidy-calls ${ploidy}" : "" + def model_command = model ? "--model ${model}" : "" + def input_list = tsv.collect { "--input ${it}" }.join(' ') + def output_command = model ? "--output ${prefix}-cnv-calls" : "--output ${prefix}-cnv-model" + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK GermlineCNVCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + export THEANO_FLAGS="base_compiledir=\$PWD" + export PYTENSOR_FLAGS="base_compiledir=\$PWD" + export OMP_NUM_THREADS=${task.cpus} + export MKL_NUM_THREADS=${task.cpus} + + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + GermlineCNVCaller \\ + ${input_list} \\ + ${ploidy_command} \\ + ${output_command} \\ + --output-prefix ${prefix} \\ + ${args} \\ + ${intervals_command} \\ + ${model_command} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix}-cnv-calls/${prefix}-calls + mkdir -p ${prefix}-cnv-model/${prefix}-model + mkdir -p ${prefix}-cnv-model/${prefix}-calls + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/germlinecnvcaller/meta.yml b/modules/nf-core/gatk4/germlinecnvcaller/meta.yml new file mode 100644 index 0000000..4d8bb1d --- /dev/null +++ b/modules/nf-core/gatk4/germlinecnvcaller/meta.yml @@ -0,0 +1,93 @@ +name: "gatk4_germlinecnvcaller" +description: Calls copy-number variants in germline samples given their counts and + the output of DetermineGermlineContigPloidy. +keywords: + - gatk + - germline contig ploidy + - germlinecnvcaller +tools: + - "gatk4": + description: Developed in the Data Sciences Platform at the Broad Institute, the + toolkit offers a wide variety of tools with a primary focus on variant discovery + and genotyping. Its powerful processing engine and high-performance computing + features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tsv: + type: file + description: One or more count TSV files created with gatk/collectreadcounts + pattern: "*.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - intervals: + type: file + description: Optional - A bed file containing the intervals to include in the + process + pattern: "*.bed" + ontologies: [] + - ploidy: + type: directory + description: Directory containing ploidy calls produced by determinegermlinecontigploidy + case or cohort mode + pattern: "*-calls" + - model: + type: directory + description: Optional - directory containing the model produced by germlinecnvcaller + cohort mode + pattern: "*-cnv-model/*-model" +output: + cohortcalls: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-cnv-model/*-calls": + type: directory + description: Tar gzipped directory containing calls produced by germlinecnvcaller + case mode + pattern: "*-cnv-model/*-calls" + cohortmodel: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-cnv-model/*-model": + type: directory + description: Optional - Tar gzipped directory containing the model produced + by germlinecnvcaller cohort mode + pattern: "*-cnv-model/*-model" + casecalls: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-cnv-calls/*-calls": + type: directory + description: Tar gzipped directory containing calls produced by germlinecnvcaller + case mode + pattern: "*-cnv-calls/*-calls" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@ryanjameskennedy" + - "@ViktorHy" +maintainers: + - "@ryanjameskennedy" + - "@ViktorHy" diff --git a/modules/nf-core/gatk4/indexfeaturefile/environment.yml b/modules/nf-core/gatk4/indexfeaturefile/environment.yml new file mode 100644 index 0000000..67e0eb8 --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/indexfeaturefile/main.nf b/modules/nf-core/gatk4/indexfeaturefile/main.nf new file mode 100644 index 0000000..aa8fe72 --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/main.nf @@ -0,0 +1,42 @@ +process GATK4_INDEXFEATUREFILE { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" + + input: + tuple val(meta), path(feature_file) + + output: + tuple val(meta), path("*.{tbi,idx}"), emit: index + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK IndexFeatureFile] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + IndexFeatureFile \\ + --input ${feature_file} \\ + --tmp-dir . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/indexfeaturefile/meta.yml b/modules/nf-core/gatk4/indexfeaturefile/meta.yml new file mode 100644 index 0000000..ff747b5 --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/meta.yml @@ -0,0 +1,50 @@ +name: gatk4_indexfeaturefile +description: Creates an index for a feature file, e.g. VCF or BED file. +keywords: + - feature + - gatk4 + - index + - indexfeaturefile +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["BSD-3-clause"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - feature_file: + type: file + description: VCF/BED file + pattern: "*.{vcf,vcf.gz,bed,bed.gz}" + ontologies: [] +output: + index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{tbi,idx}": + type: file + description: Index for VCF/BED file + pattern: "*.{tbi,idx}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@santiagorevale" +maintainers: + - "@santiagorevale" diff --git a/modules/nf-core/gatk4/intervallisttools/environment.yml b/modules/nf-core/gatk4/intervallisttools/environment.yml new file mode 100644 index 0000000..67e0eb8 --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/intervallisttools/main.nf b/modules/nf-core/gatk4/intervallisttools/main.nf new file mode 100644 index 0000000..911eb45 --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/main.nf @@ -0,0 +1,75 @@ +process GATK4_INTERVALLISTTOOLS { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" + + input: + tuple val(meta), path(intervals) + + output: + tuple val(meta), path("*_split/*/*.interval_list"), emit: interval_list + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK IntervalListTools] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + + mkdir ${prefix}_split + + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + IntervalListTools \\ + --INPUT ${intervals} \\ + --OUTPUT ${prefix}_split \\ + --TMP_DIR . \\ + ${args} + + python3 < versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix}_split/temp_0001_of_6 + mkdir -p ${prefix}_split/temp_0002_of_6 + mkdir -p ${prefix}_split/temp_0003_of_6 + mkdir -p ${prefix}_split/temp_0004_of_6 + touch ${prefix}_split/temp_0001_of_6/1scattered.interval_list + touch ${prefix}_split/temp_0002_of_6/2scattered.interval_list + touch ${prefix}_split/temp_0003_of_6/3scattered.interval_list + touch ${prefix}_split/temp_0004_of_6/4scattered.interval_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/intervallisttools/meta.yml b/modules/nf-core/gatk4/intervallisttools/meta.yml new file mode 100644 index 0000000..8bad365 --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/meta.yml @@ -0,0 +1,52 @@ +name: gatk4_intervallisttools +description: Splits the interval list file into unique, equally-sized interval files + and place it under a directory +keywords: + - bed + - gatk4 + - interval_list + - sort +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals: + type: file + description: Interval file + ontologies: [] +output: + interval_list: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_split/*/*.interval_list": + type: file + description: Interval list files + pattern: "*.interval_list" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@praveenraj2018" +maintainers: + - "@praveenraj2018" diff --git a/modules/nf-core/gatk4/mutect2/environment.yml b/modules/nf-core/gatk4/mutect2/environment.yml new file mode 100644 index 0000000..67e0eb8 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/mutect2/main.nf b/modules/nf-core/gatk4/mutect2/main.nf new file mode 100644 index 0000000..f8d3b38 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/main.nf @@ -0,0 +1,80 @@ +process GATK4_MUTECT2 { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" + + input: + tuple val(meta), path(input), path(input_index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai), path(gzi) + tuple val(meta4), path(dict) + path alleles + path alleles_tbi + path germline_resource + path germline_resource_tbi + path panel_of_normals + path panel_of_normals_tbi + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi"), emit: tbi + tuple val(meta), path("*.stats"), emit: stats + tuple val(meta), path("*.f1r2.tar.gz"), emit: f1r2, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def inputs = input.collect { vcf_ -> "--input ${vcf_}" }.join(" ") + def interval_command = intervals ? "--intervals ${intervals}" : "" + def pon_command = panel_of_normals ? "--panel-of-normals ${panel_of_normals}" : "" + def gr_command = germline_resource ? "--germline-resource ${germline_resource}" : "" + def a_command = alleles ? "--alleles ${alleles}": "" + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + Mutect2 \\ + ${inputs} \\ + --output ${prefix}.vcf.gz \\ + --reference ${fasta} \\ + ${pon_command} \\ + ${gr_command} \\ + ${a_command} \\ + ${interval_command} \\ + --tmp-dir . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + touch ${prefix}.vcf.gz.stats + echo "" | gzip > ${prefix}.f1r2.tar.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/mutect2/meta.yml b/modules/nf-core/gatk4/mutect2/meta.yml new file mode 100644 index 0000000..64029f2 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/meta.yml @@ -0,0 +1,161 @@ +name: gatk4_mutect2 +description: Call somatic SNVs and indels via local assembly of haplotypes. +keywords: + - gatk4 + - haplotype + - indels + - mutect2 + - snvs + - somatic +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - input: + type: list + description: list of BAM files, also able to take CRAM as an input + pattern: "*.{bam/cram}" + - input_index: + type: list + description: list of BAM file indexes, also able to take CRAM indexes as an + input + pattern: "*.{bam.bai/cram.crai}" + - intervals: + type: file + description: Specify region the tools is run on. + pattern: ".{bed,interval_list}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.{fasta,fasta.gz}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.{fasta.fai,fasta.fai.gz}" + ontologies: [] + - gzi: + type: file + description: Index of bgzipped reference fasta file + pattern: "*.fasta.gz.gzi" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + ontologies: [] + - alleles: + type: file + description: vcf file to be used to force-call alleles. + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - alleles_tbi: + type: file + description: Index file for alleles to be force-called. + pattern: "*.vcf.gz.tbi" + ontologies: [] + - germline_resource: + type: file + description: Population vcf of germline sequencing, containing allele fractions. + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - germline_resource_tbi: + type: file + description: Index file for the germline resource. + pattern: "*.vcf.gz.tbi" + ontologies: [] + - panel_of_normals: + type: file + description: vcf file to be used as a panel of normals. + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - panel_of_normals_tbi: + type: file + description: Index for the panel of normals. + pattern: "*.vcf.gz.tbi" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: A Groovy map containing sample information + - "*.vcf.gz": + type: file + description: compressed vcf file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + tbi: + - - meta: + type: map + description: A Groovy map containing sample information + - "*.tbi": + type: file + description: Index of vcf file + pattern: "*vcf.gz.tbi" + ontologies: [] + stats: + - - meta: + type: map + description: A Groovy map containing sample information + - "*.stats": + type: file + description: Stats file that pairs with output vcf file + pattern: "*vcf.gz.stats" + ontologies: [] + f1r2: + - - meta: + type: map + description: A Groovy map containing sample information + - "*.f1r2.tar.gz": + type: file + description: file containing information to be passed to LearnReadOrientationModel + (only outputted when tumor_normal_pair mode is run) + pattern: "*.f1r2.tar.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@GCJMackenzie" + - "@ramprasadn" +maintainers: + - "@GCJMackenzie" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/preprocessintervals/environment.yml b/modules/nf-core/gatk4/preprocessintervals/environment.yml new file mode 100644 index 0000000..67e0eb8 --- /dev/null +++ b/modules/nf-core/gatk4/preprocessintervals/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/preprocessintervals/main.nf b/modules/nf-core/gatk4/preprocessintervals/main.nf new file mode 100644 index 0000000..3d63502 --- /dev/null +++ b/modules/nf-core/gatk4/preprocessintervals/main.nf @@ -0,0 +1,63 @@ +process GATK4_PREPROCESSINTERVALS { + tag "${fasta}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(fai) + tuple val(meta3), path(dict) + tuple val(meta4), path(intervals) + tuple val(meta5), path(exclude_intervals) + + output: + tuple val(meta), path("*.interval_list"), emit: interval_list + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def include_command = intervals ? "--intervals ${intervals}" : "" + def exclude_command = exclude_intervals ? "--exclude-intervals ${exclude_intervals}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info('[GATK PreprocessIntervals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + PreprocessIntervals \\ + ${include_command} \\ + ${exclude_command} \\ + --reference ${fasta} \\ + --output ${prefix}.interval_list \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.interval_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/preprocessintervals/meta.yml b/modules/nf-core/gatk4/preprocessintervals/meta.yml new file mode 100644 index 0000000..af3e5bc --- /dev/null +++ b/modules/nf-core/gatk4/preprocessintervals/meta.yml @@ -0,0 +1,98 @@ +name: "gatk4_preprocessintervals" +description: Prepares bins for coverage collection. +keywords: + - bed + - gatk4 + - interval + - preprocessintervals +tools: + - "gatk4": + description: Developed in the Data Sciences Platform at the Broad Institute, the + toolkit offers a wide variety of tools with a primary focus on variant discovery + and genotyping. Its powerful processing engine and high-performance computing + features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - intervals: + type: file + description: Interval file (bed or interval_list) with the genomic regions to + be included from the analysis (optional) + pattern: "*.{bed,interval_list}" + ontologies: [] + - - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - exclude_intervals: + type: file + description: Interval file (bed or interval_list) with the genomic regions to + be excluded from the analysis (optional) + pattern: "*.{bed,interval_list}" + ontologies: [] +output: + interval_list: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - "*.interval_list": + type: file + description: Processed interval list file + pattern: "*.{bed,interval_list}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@ryanjameskennedy" + - "@ViktorHy" + - "@ramprasadn" +maintainers: + - "@ryanjameskennedy" + - "@ViktorHy" + - "@ramprasadn" diff --git a/modules/nf-core/gawk/environment.yml b/modules/nf-core/gawk/environment.yml new file mode 100644 index 0000000..f52109e --- /dev/null +++ b/modules/nf-core/gawk/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::gawk=5.3.0 diff --git a/modules/nf-core/gawk/main.nf b/modules/nf-core/gawk/main.nf new file mode 100644 index 0000000..615b2ce --- /dev/null +++ b/modules/nf-core/gawk/main.nf @@ -0,0 +1,70 @@ +process GAWK { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.3.0' : + 'biocontainers/gawk:5.3.0' }" + + input: + tuple val(meta), path(input, arity: '0..*') + path(program_file) + val(disable_redirect_output) + + output: + tuple val(meta), path("*.${suffix}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' // args is used for the main arguments of the tool + def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.collect{ it.getExtension()}.get(0)}" // use the first extension of the input files + + program = program_file ? "-f ${program_file}" : "${args2}" + lst_gz = input.findResults{ it.getExtension().endsWith("gz") ? it.toString() : null } + unzip = lst_gz ? "gunzip -q -f ${lst_gz.join(" ")}" : "" + input_cmd = input.collect { it.toString() - ~/\.gz$/ }.join(" ") + output_cmd = suffix.endsWith("gz") ? "| gzip > ${prefix}.${suffix}" : "> ${prefix}.${suffix}" + output = disable_redirect_output ? "" : output_cmd + cleanup = lst_gz ? "rm ${lst_gz.collect{ it - ~/\.gz$/ }.join(" ")}" : "" + + input.collect{ + assert it.name != "${prefix}.${suffix}" : "Input and output names are the same, set prefix in module configuration to disambiguate!" + } + + """ + ${unzip} + + awk \\ + ${args} \\ + ${program} \\ + ${input_cmd} \\ + ${output} + + ${cleanup} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.getExtension()}" + def create_cmd = suffix.endsWith("gz") ? "echo '' | gzip >" : "touch" + + """ + ${create_cmd} ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gawk/meta.yml b/modules/nf-core/gawk/meta.yml new file mode 100644 index 0000000..732e18a --- /dev/null +++ b/modules/nf-core/gawk/meta.yml @@ -0,0 +1,68 @@ +name: "gawk" +description: | + If you are like many computer users, you would frequently like to make changes in various text files + wherever certain patterns appear, or extract data from parts of certain lines while discarding the rest. + The job is easy with awk, especially the GNU implementation gawk. +keywords: + - gawk + - awk + - txt + - text + - file parsing +tools: + - "gawk": + description: "GNU awk" + homepage: "https://www.gnu.org/software/gawk/" + documentation: "https://www.gnu.org/software/gawk/manual/" + tool_dev_url: "https://www.gnu.org/prep/ftp.html" + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: The input file - Specify the logic that needs to be executed on + this file on the `ext.args2` or in the program file. If the files have a `.gz` + extension, they will be unzipped using `zcat`. + pattern: "*" + ontologies: [] + - program_file: + type: file + description: Optional file containing logic for awk to execute. If you don't wish + to use a file, you can use `ext.args2` to specify the logic. + pattern: "*" + ontologies: [] + - disable_redirect_output: + type: boolean + description: Disable the redirection of awk output to a given file. This is useful + if you want to use awk's built-in redirect to write files instead of the shell's + redirect. +output: + output: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${suffix}": + type: file + description: The output file - if using shell redirection, specify the name + of this file using `ext.prefix` and the extension using `ext.suffix`. Otherwise, + ensure the awk program produces files with the extension in `ext.suffix`. + pattern: "*" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/mosdepth/environment.yml b/modules/nf-core/mosdepth/environment.yml new file mode 100644 index 0000000..1c7f3ee --- /dev/null +++ b/modules/nf-core/mosdepth/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/mosdepth + - htslib=1.22.1 + - mosdepth=0.3.11=h0ec343a_1 diff --git a/modules/nf-core/mosdepth/main.nf b/modules/nf-core/mosdepth/main.nf new file mode 100644 index 0000000..63739bf --- /dev/null +++ b/modules/nf-core/mosdepth/main.nf @@ -0,0 +1,77 @@ +process MOSDEPTH { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/00/00d32b53160c26794959da7303ee6e2107afd4d292060c9f287b0af1fddbd847/data' : + 'community.wave.seqera.io/library/mosdepth_htslib:0f58993cb6d93294'}" + + input: + tuple val(meta), path(bam), path(bai), path(bed) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path('*.global.dist.txt') , emit: global_txt + tuple val(meta), path('*.summary.txt') , emit: summary_txt + tuple val(meta), path('*.region.dist.txt') , optional:true, emit: regions_txt + tuple val(meta), path('*.per-base.d4') , optional:true, emit: per_base_d4 + tuple val(meta), path('*.per-base.bed.gz') , optional:true, emit: per_base_bed + tuple val(meta), path('*.per-base.bed.gz.csi') , optional:true, emit: per_base_csi + tuple val(meta), path('*.regions.bed.gz') , optional:true, emit: regions_bed + tuple val(meta), path('*.regions.bed.gz.csi') , optional:true, emit: regions_csi + tuple val(meta), path('*.quantized.bed.gz') , optional:true, emit: quantized_bed + tuple val(meta), path('*.quantized.bed.gz.csi') , optional:true, emit: quantized_csi + tuple val(meta), path('*.thresholds.bed.gz') , optional:true, emit: thresholds_bed + tuple val(meta), path('*.thresholds.bed.gz.csi'), optional:true, emit: thresholds_csi + tuple val("${task.process}"), val('mosdepth'), eval("mosdepth --version | sed 's/mosdepth //g'"), topic: versions, emit: versions_mosdepth + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--fasta ${fasta}" : "" + def interval = bed ? "--by ${bed}" : "" + if (bed && (args.contains("--by") || args.contains("-b "))) { + error "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" + } + if (args.contains("--thresholds") && !(bed || args.contains("--by") || args.contains("-b "))) { + error "'--thresholds' can only be specified in conjunction with '--by' or an input bed file" + } + + """ + mosdepth \\ + --threads $task.cpus \\ + $interval \\ + $reference \\ + $args \\ + $prefix \\ + $bam + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (bed && (args.contains("--by") || args.contains("-b "))) { + error "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" + } + if (args.contains("--thresholds") && !(bed || args.contains("--by") || args.contains("-b "))) { + error "'--thresholds' can only be specified in conjunction with '--by' or an input bed file" + } + """ + touch ${prefix}.global.dist.txt + touch ${prefix}.region.dist.txt + touch ${prefix}.summary.txt + touch ${prefix}.per-base.d4 + echo "" | gzip > ${prefix}.per-base.bed.gz + touch ${prefix}.per-base.bed.gz.csi + echo "" | gzip > ${prefix}.regions.bed.gz + touch ${prefix}.regions.bed.gz.csi + echo "" | gzip > ${prefix}.quantized.bed.gz + touch ${prefix}.quantized.bed.gz.csi + echo "" | gzip > ${prefix}.thresholds.bed.gz + touch ${prefix}.thresholds.bed.gz.csi + """ +} diff --git a/modules/nf-core/mosdepth/meta.yml b/modules/nf-core/mosdepth/meta.yml new file mode 100644 index 0000000..04c8bfe --- /dev/null +++ b/modules/nf-core/mosdepth/meta.yml @@ -0,0 +1,211 @@ +name: mosdepth +description: Calculates genome-wide sequencing coverage. +keywords: + - mosdepth + - bam + - cram + - coverage +tools: + - mosdepth: + description: | + Fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. + documentation: https://github.com/brentp/mosdepth + doi: 10.1093/bioinformatics/btx699 + licence: ["MIT"] + identifier: biotools:mosdepth +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM/CRAM file + pattern: "*.{bam,cram}" + ontologies: [] + - bai: + type: file + description: Index for BAM/CRAM file + pattern: "*.{bai,crai}" + ontologies: [] + - bed: + type: file + description: BED file with intersected intervals + pattern: "*.{bed}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing bed information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta}" + ontologies: [] +output: + global_txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.global.dist.txt": + type: file + description: Text file with global cumulative coverage distribution + pattern: "*.{global.dist.txt}" + ontologies: [] + summary_txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.summary.txt": + type: file + description: Text file with summary mean depths per chromosome and regions + pattern: "*.{summary.txt}" + ontologies: [] + regions_txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.region.dist.txt": + type: file + description: Text file with region cumulative coverage distribution + pattern: "*.{region.dist.txt}" + ontologies: [] + per_base_d4: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.per-base.d4": + type: file + description: D4 file with per-base coverage + pattern: "*.{per-base.d4}" + ontologies: [] + per_base_bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.per-base.bed.gz": + type: file + description: BED file with per-base coverage + pattern: "*.{per-base.bed.gz}" + ontologies: [] + per_base_csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.per-base.bed.gz.csi": + type: file + description: Index file for BED file with per-base coverage + pattern: "*.{per-base.bed.gz.csi}" + ontologies: [] + regions_bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.regions.bed.gz": + type: file + description: BED file with per-region coverage + pattern: "*.{regions.bed.gz}" + ontologies: [] + regions_csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.regions.bed.gz.csi": + type: file + description: Index file for BED file with per-region coverage + pattern: "*.{regions.bed.gz.csi}" + ontologies: [] + quantized_bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.quantized.bed.gz": + type: file + description: BED file with binned coverage + pattern: "*.{quantized.bed.gz}" + ontologies: [] + quantized_csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.quantized.bed.gz.csi": + type: file + description: Index file for BED file with binned coverage + pattern: "*.{quantized.bed.gz.csi}" + ontologies: [] + thresholds_bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.thresholds.bed.gz": + type: file + description: BED file with the number of bases in each region that are covered + at or above each threshold + pattern: "*.{thresholds.bed.gz}" + ontologies: [] + thresholds_csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.thresholds.bed.gz.csi": + type: file + description: Index file for BED file with threshold coverage + pattern: "*.{thresholds.bed.gz.csi}" + ontologies: [] + versions_mosdepth: + - - ${task.process}: + type: string + description: The process the versions were collected from + - mosdepth: + type: string + description: The tool name + - "mosdepth --version | sed 's/mosdepth //g'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - mosdepth: + type: string + description: The tool name + - "mosdepth --version | sed 's/mosdepth //g'": + type: string + description: The command used to generate the version of the tool +authors: + - "@joseespinosa" + - "@drpatelh" + - "@ramprasadn" + - "@matthdsm" +maintainers: + - "@joseespinosa" + - "@ramprasadn" + - "@matthdsm" diff --git a/modules/nf-core/mosdepth/tests/main.nf.test b/modules/nf-core/mosdepth/tests/main.nf.test new file mode 100644 index 0000000..b05dde5 --- /dev/null +++ b/modules/nf-core/mosdepth/tests/main.nf.test @@ -0,0 +1,268 @@ +nextflow_process { + + name "Test Process MOSDEPTH" + script "../main.nf" + process "MOSDEPTH" + + tag "modules" + tag "modules_nfcore" + tag "mosdepth" + config "./nextflow.config" + + test("homo_sapiens - bam, bai, []") { + + when { + params { + module_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - bam, bai, bed") { + + when { + params { + module_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - cram, crai, []") { + + when { + params { + module_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - cram, crai, bed") { + + when { + params { + module_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - bam, bai, [] - window") { + + when { + params { + module_args = "--by 100" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - bam, bai, [] - quantized") { + + when { + params { + module_args = "--quantize 0:1:4:100:200" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - bam, bai, bed - thresholds") { + + when { + params { + module_args = "--thresholds 1,10,20,30" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - bam, bai, bed - fail") { + + when { + params { + module_args = "--by 100" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.failed + } + + } + + test("homo_sapiens - bam, bai, [] - stub") { + + options "-stub" + when { + params { + module_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + +} diff --git a/modules/nf-core/mosdepth/tests/main.nf.test.snap b/modules/nf-core/mosdepth/tests/main.nf.test.snap new file mode 100644 index 0000000..c27fcc7 --- /dev/null +++ b/modules/nf-core/mosdepth/tests/main.nf.test.snap @@ -0,0 +1,1450 @@ +{ + "homo_sapiens - bam, bai, [] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.global.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.region.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.d4:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.global.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "per_base_d4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.d4:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "quantized_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "quantized_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.region.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "thresholds_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "thresholds_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:06:13.219131" + }, + "homo_sapiens - cram, crai, bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:14.011309" + }, + "homo_sapiens - bam, bai, [] - quantized": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,f037c215449d361112efc10108fcc17c" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,c0a3176a59010639455a4aefb3f247ef" + ] + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,f037c215449d361112efc10108fcc17c" + ] + ], + "quantized_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,c0a3176a59010639455a4aefb3f247ef" + ] + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:22.818082" + }, + "homo_sapiens - bam, bai, bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:04.449943" + }, + "homo_sapiens - bam, bai, [] - window": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,0b6ea9f0da1228252d9aef2d3b6f7f76" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,34f48d16fcdd61e44d812e29e02c77b8" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,17a2cbe22a948d7c004b90a1f28347a1" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,34f48d16fcdd61e44d812e29e02c77b8" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,17a2cbe22a948d7c004b90a1f28347a1" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,0b6ea9f0da1228252d9aef2d3b6f7f76" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:18.435089" + }, + "homo_sapiens - bam, bai, []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:21:59.785829" + }, + "homo_sapiens - cram, crai, []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:09.294766" + }, + "homo_sapiens - bam, bai, bed - thresholds": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,fe70ae728cd10726c42a2bcd44adfc9d" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,2c52ab89e7496af475de3cb2ca04c7b3" + ] + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,fe70ae728cd10726c42a2bcd44adfc9d" + ] + ], + "thresholds_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,2c52ab89e7496af475de3cb2ca04c7b3" + ] + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:27.300204" + } +} \ No newline at end of file diff --git a/modules/nf-core/mosdepth/tests/nextflow.config b/modules/nf-core/mosdepth/tests/nextflow.config new file mode 100644 index 0000000..b21c05b --- /dev/null +++ b/modules/nf-core/mosdepth/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: "MOSDEPTH" { + ext.args = params.module_args + } +} diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index a27122c..d02016a 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,5 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::multiqc=1.27 + - bioconda::multiqc=1.32 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 58d9313..c1158fb 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.27--pyhdfd78af_0' : - 'biocontainers/multiqc:1.27--pyhdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/8c/8c6c120d559d7ee04c7442b61ad7cf5a9e8970be5feefb37d68eeaa60c1034eb/data' : + 'community.wave.seqera.io/library/multiqc:1.32--d58f60e4deb769bf' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index b16c187..ce30eb7 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -15,57 +15,71 @@ tools: licence: ["GPL-3.0-or-later"] identifier: biotools:multiqc input: - - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections - in multiqc_config. - pattern: "*.{yml,yaml}" - - - multiqc_logo: + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + ontologies: [] + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + ontologies: [] + - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV +output: + report: + - "*multiqc_report.html": type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" - - - replace_names: + description: MultiQC report file + pattern: "multiqc_report.html" + ontologies: [] + data: + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + plots: + - "*_plots": type: file - description: | - Optional two-column sample renaming file. First column a set of - patterns, second column a set of corresponding replacements. Passed via - MultiQC's `--replace-names` option. - pattern: "*.{tsv}" - - - sample_names: + description: Plots created by MultiQC + pattern: "*_data" + ontologies: [] + versions: + - versions.yml: type: file - description: | - Optional TSV file with headers, passed to the MultiQC --sample_names - argument. - pattern: "*.{tsv}" -output: - - report: - - "*multiqc_report.html": - type: file - description: MultiQC report file - pattern: "multiqc_report.html" - - data: - - "*_data": - type: directory - description: MultiQC data dir - pattern: "multiqc_data" - - plots: - - "*_plots": - type: file - description: Plots created by MultiQC - pattern: "*_data" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@abhi18av" - "@bunop" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test deleted file mode 100644 index 33316a7..0000000 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ /dev/null @@ -1,92 +0,0 @@ -nextflow_process { - - name "Test Process MULTIQC" - script "../main.nf" - process "MULTIQC" - - tag "modules" - tag "modules_nfcore" - tag "multiqc" - - config "./nextflow.config" - - test("sarscov2 single-end [fastqc]") { - - when { - process { - """ - input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) - input[1] = [] - input[2] = [] - input[3] = [] - input[4] = [] - input[5] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, - { assert process.out.data[0] ==~ ".*/multiqc_data" }, - { assert snapshot(process.out.versions).match("multiqc_versions_single") } - ) - } - - } - - test("sarscov2 single-end [fastqc] [config]") { - - when { - process { - """ - input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) - input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) - input[2] = [] - input[3] = [] - input[4] = [] - input[5] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, - { assert process.out.data[0] ==~ ".*/multiqc_data" }, - { assert snapshot(process.out.versions).match("multiqc_versions_config") } - ) - } - } - - test("sarscov2 single-end [fastqc] - stub") { - - options "-stub" - - when { - process { - """ - input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) - input[1] = [] - input[2] = [] - input[3] = [] - input[4] = [] - input[5] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.report.collect { file(it).getName() } + - process.out.data.collect { file(it).getName() } + - process.out.plots.collect { file(it).getName() } + - process.out.versions ).match("multiqc_stub") } - ) - } - - } -} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap deleted file mode 100644 index 7b7c132..0000000 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ /dev/null @@ -1,41 +0,0 @@ -{ - "multiqc_versions_single": { - "content": [ - [ - "versions.yml:md5,8f3b8c1cec5388cf2708be948c9fa42f" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.4" - }, - "timestamp": "2025-01-27T09:29:57.631982377" - }, - "multiqc_stub": { - "content": [ - [ - "multiqc_report.html", - "multiqc_data", - "multiqc_plots", - "versions.yml:md5,8f3b8c1cec5388cf2708be948c9fa42f" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.4" - }, - "timestamp": "2025-01-27T09:30:34.743726958" - }, - "multiqc_versions_config": { - "content": [ - [ - "versions.yml:md5,8f3b8c1cec5388cf2708be948c9fa42f" - ] - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.4" - }, - "timestamp": "2025-01-27T09:30:21.44383553" - } -} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/nextflow.config b/modules/nf-core/multiqc/tests/nextflow.config deleted file mode 100644 index c537a6a..0000000 --- a/modules/nf-core/multiqc/tests/nextflow.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: 'MULTIQC' { - ext.prefix = null - } -} diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml deleted file mode 100644 index bea6c0d..0000000 --- a/modules/nf-core/multiqc/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -multiqc: - - modules/nf-core/multiqc/** diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml new file mode 100644 index 0000000..89e12a6 --- /dev/null +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf new file mode 100644 index 0000000..ed2d70a --- /dev/null +++ b/modules/nf-core/samtools/faidx/main.nf @@ -0,0 +1,61 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(fai) + val get_sizes + + output: + tuple val(meta), path ("*.{fa,fasta}") , emit: fa, optional: true + tuple val(meta), path ("*.sizes") , emit: sizes, optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def get_sizes_command = get_sizes ? "cut -f 1,2 ${fasta}.fai > ${fasta}.sizes" : '' + """ + samtools \\ + faidx \\ + $fasta \\ + $args + + ${get_sizes_command} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' + def get_sizes_command = get_sizes ? "touch ${fasta}.sizes" : '' + """ + ${fastacmd} + touch ${fasta}.fai + if [[ "${fasta.extension}" == "gz" ]]; then + touch ${fasta}.gzi + fi + + ${get_sizes_command} + + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 0000000..b7a2e0c --- /dev/null +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,102 @@ +name: samtools_faidx +description: Index FASTA file, and optionally generate a file of chromosome sizes +keywords: + - index + - fasta + - faidx + - chromosome +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + ontologies: [] + - get_sizes: + type: boolean + description: use cut to get the sizes of the index (true) or not (false) + +output: + fa: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{fa,fasta}": + type: file + description: FASTA file + pattern: "*.{fa}" + ontologies: [] + sizes: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sizes": + type: file + description: File containing chromosome lengths + pattern: "*.{sizes}" + ontologies: [] + fai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fai": + type: file + description: FASTA index file + pattern: "*.{fai}" + ontologies: [] + gzi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.gzi": + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@drpatelh" + - "@ewels" + - "@phue" +maintainers: + - "@maxulysse" + - "@phue" diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 0000000..89e12a6 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 0000000..a77ad82 --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" + """ + touch ${input}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 0000000..1bed6bc --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,77 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file + ontologies: [] +output: + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + ontologies: [] + crai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml new file mode 100644 index 0000000..89e12a6 --- /dev/null +++ b/modules/nf-core/samtools/view/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf new file mode 100644 index 0000000..02d9b0f --- /dev/null +++ b/modules/nf-core/samtools/view/main.nf @@ -0,0 +1,104 @@ +process SAMTOOLS_VIEW { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" + + input: + tuple val(meta), path(input), path(index) + tuple val(meta2), path(fasta) + path qname + val index_format + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${file_type}.bai"), emit: bai, optional: true + tuple val(meta), path("${prefix}.${file_type}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${file_type}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}"), emit: unselected, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}.{csi,crai}"), emit: unselected_index, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + + output_file = index_format ? "${prefix}.${file_type}##idx##${prefix}.${file_type}.${index_format} --write-index" : "${prefix}.${file_type}" + // Can't choose index type of unselected file + readnames = qname ? "--qname-file ${qname} --output-unselected ${prefix}.unselected.${file_type}": "" + + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + if (index_format) { + if (!index_format.matches('bai|csi|crai')) { + error "Index format not one of bai, csi, crai." + } else if (file_type == "sam") { + error "Indexing not compatible with SAM output" + } + } + """ + # Note: --threads value represents *additional* CPUs to allocate (total CPUs = 1 + --threads). + samtools \\ + view \\ + --threads ${task.cpus-1} \\ + ${reference} \\ + ${readnames} \\ + $args \\ + -o ${output_file} \\ + $input \\ + $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + default_index_format = + file_type == "bam" ? "csi" : + file_type == "cram" ? "crai" : "" + index = index_format ? "touch ${prefix}.${file_type}.${index_format}" : args.contains("--write-index") ? "touch ${prefix}.${file_type}.${default_index_format}" : "" + unselected = qname ? "touch ${prefix}.unselected.${file_type}" : "" + // Can't choose index type of unselected file + unselected_index = qname && (args.contains("--write-index") || index_format) ? "touch ${prefix}.unselected.${file_type}.${default_index_format}" : "" + + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + if (index_format) { + if (!index_format.matches('bai|csi|crai')) { + error "Index format not one of bai, csi, crai." + } else if (file_type == "sam") { + error "Indexing not compatible with SAM output." + } + } + """ + touch ${prefix}.${file_type} + ${index} + ${unselected} + ${unselected_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml new file mode 100644 index 0000000..3ebbdb8 --- /dev/null +++ b/modules/nf-core/samtools/view/meta.yml @@ -0,0 +1,159 @@ +name: samtools_view +description: filter/convert SAM/BAM/CRAM file +keywords: + - view + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - index: + type: file + description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) + pattern: "*.{.bai,.csi,.crai}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + ontologies: [] + - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" + ontologies: [] + - index_format: + type: string + description: Index format, used together with ext.args = '--write-index' + pattern: "bai|csi|crai" +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: optional filtered/converted BAM file + pattern: "*.{bam}" + ontologies: [] + cram: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: optional filtered/converted CRAM file + pattern: "*.{cram}" + ontologies: [] + sam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.sam: + type: file + description: optional filtered/converted SAM file + pattern: "*.{sam}" + ontologies: [] + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.bai: + type: file + description: optional BAM file index + pattern: "*.{bai}" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.csi: + type: file + description: optional tabix BAM file index + pattern: "*.{csi}" + ontologies: [] + crai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.crai: + type: file + description: optional CRAM file index + pattern: "*.{crai}" + ontologies: [] + unselected: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}: + type: file + description: optional file with unselected alignments + pattern: "*.unselected.{bam,cram,sam}" + ontologies: [] + unselected_index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}.{csi,crai}: + type: file + description: index for the "unselected" file + pattern: "*.unselected.{csi,crai}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" +maintainers: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/nextflow.config b/nextflow.config index 24e852e..fbbc363 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,21 +9,45 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options - input = null + input = null // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false + genome = null + igenomes_base = 's3://ngi-igenomes/igenomes/' + igenomes_ignore = false + + // Building Panel of Normals and models + tools = null // No default, must be specified + + // Mutect2 options + mutect2_pon_name = null + + // Germlinecnvcaller options + gcnv_analysis_type = 'wgs' + gcnv_bin_length = 1000 + gcnv_model_name = 'germlinecnvcaller' + gcnv_padding = 0 + gcnv_readcount_format = 'HDF5' + gcnv_scatter_content = 5000 + + // Gens options + gens_bin_length = 100 + gens_maximum_chunk_size = 167772150 + gens_min_interval_median_percentile = 5.0 + gens_pon_name = 'gens' + gens_readcount_format = 'HDF5' + gens_analysis_type = 'srs' + + // CNVkit options + cnvkit_targets = null // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options outdir = null @@ -32,13 +56,15 @@ params { email_on_fail = null plaintext_email = false monochrome_logs = false - hook_url = null + hook_url = System.getenv('HOOK_URL') help = false help_full = false show_hidden = false version = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' - trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options + trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + + // Config options config_profile_name = null config_profile_description = null @@ -91,7 +117,18 @@ profiles { apptainer.enabled = false docker.runOptions = '-u $(id -u):$(id -g)' } - arm { + arm64 { + process.arch = 'arm64' + // TODO https://github.com/nf-core/modules/issues/6694 + // For now if you're using arm64 you have to use wave for the sake of the maintainers + // wave profile + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } + emulate_amd64 { docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { @@ -148,18 +185,6 @@ profiles { wave.freeze = true wave.strategy = 'conda,container' } - gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB - process { - resourceLimits = [ - memory: 8.GB, - cpus : 4, - time : 1.h - ] - } - } gpu { docker.runOptions = '-u $(id -u):$(id -g) --gpus all' apptainer.runOptions = '--nv' @@ -177,8 +202,7 @@ includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !pa // Load nf-core/createpanelrefs custom profiles from different institutions. -// TODO nf-core: Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs -// includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/pipeline/createpanelrefs.config" : "/dev/null" +includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/pipeline/createpanelrefs.config" : "/dev/null" // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled @@ -197,10 +221,10 @@ includeConfig !params.igenomes_ignore ? 'conf/igenomes.config' : 'conf/igenomes_ // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. env { - PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" - JULIA_DEPOT_PATH = "/usr/local/share/julia" + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" } // Set bash options @@ -235,63 +259,47 @@ dag { manifest { name = 'nf-core/createpanelrefs' - author = """@maxulysse""" // The author field is deprecated from Nextflow version 24.10.0, use contributors instead contributors = [ - // TODO nf-core: Update the field with the details of the contributors to your pipeline. New with Nextflow version 24.10.0 [ - name: '@maxulysse', - affiliation: '', - email: '', - github: '', - contribution: [], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '' + name: 'Maxime U Garcia', + affiliation: 'Seqera', + email: 'maxime.garcia@seqera.io', + github: '@maxulysse', + contribution: ['author'], + orcid: '0000-0003-2827-9261' ], + [ + name: 'Ramprasad Neethiraj', + affiliation: 'School of Engineering sciences in Chemistry, Biotechnology and Health, KTH Royal Institute of Technology, Stockholm, Sweden; Science for Life Laboratory, Department of Microbiology, Tumour and Cell Biology, Karolinska Institutet, Stockholm, Sweden', + email: 'rne@kth.se', + github: '@ramprasadn', + contribution: ['author'], + orcid: '0000-0001-7313-3734' + ] ] homePage = 'https://github.com/nf-core/createpanelrefs' description = """Generate Panel of Normals, models or other similar references from lots of samples""" mainScript = 'main.nf' - defaultBranch = 'master' - nextflowVersion = '!>=24.04.2' - version = '1.0dev' + defaultBranch = 'main' + nextflowVersion = '!>=25.04.0' + version = '1.0.0' doi = '' } // Nextflow plugins plugins { - id 'nf-schema@2.3.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.5.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet } validation { defaultIgnoreParams = ["genomes"] monochromeLogs = params.monochrome_logs - help { - enabled = true - command = "nextflow run nf-core/createpanelrefs -profile --input samplesheet.csv --outdir " - fullParameter = "help_full" - showHiddenParameter = "show_hidden" - beforeText = """ --\033[2m----------------------------------------------------\033[0m- - \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m -\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m -\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m -\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m - \033[0;32m`._,._,\'\033[0m -\033[0;35m nf-core/createpanelrefs ${manifest.version}\033[0m --\033[2m----------------------------------------------------\033[0m- -""" - afterText = """${manifest.doi ? "\n* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} -* The nf-core framework - https://doi.org/10.1038/s41587-020-0439-x - -* Software dependencies - https://github.com/nf-core/createpanelrefs/blob/master/CITATIONS.md -""" - } - summary { - beforeText = validation.help.beforeText - afterText = validation.help.afterText - } } // Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' +includeConfig 'conf/modules/base.config' +includeConfig 'conf/modules/cnvkit.config' +includeConfig 'conf/modules/germlinecnvcaller_cohort.config' +includeConfig 'conf/modules/gens_pon.config' +includeConfig 'conf/modules/mutect2.config' +includeConfig 'conf/modules/prepare_genome.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index 44e4708..458cfdf 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/nf-core/createpanelrefs/master/nextflow_schema.json", + "$id": "https://raw.githubusercontent.com/nf-core/createpanelrefs/main/nextflow_schema.json", "title": "nf-core/createpanelrefs pipeline parameters", "description": "Generate Panel of Normals, models or other similar references from lots of samples", "type": "object", @@ -20,7 +20,7 @@ "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/createpanelrefs/usage#samplesheet-input).", + "help_text": "A design file with information about the samples in your experiment. Use this parameter to specify the location of the input files. It has to be a comma-separated file with a header row. See [usage docs](https://nf-co.re/sarek/usage#input).\n\nIf no input file is specified, sarek will attempt to locate one in the `{outdir}` directory.", "fa_icon": "fas fa-file-csv" }, "outdir": { @@ -43,6 +43,23 @@ } } }, + "main_options": { + "title": "Main options", + "type": "object", + "description": "Most common options used for the pipeline", + "required": ["tools"], + "default": "", + "properties": { + "tools": { + "type": "string", + "fa_icon": "fas fa-toolbox", + "description": "Tools to use for building Panel of Normals or models.", + "help_text": "Multiple tools separated with commas.\n\nTools available: CNVKIT,germlinecnvcaller,gens,mutect2", + "pattern": "^((cnvkit|germlinecnvcaller|gens|mutect2)?,?)*(?\n \n \n \"nf-core/createpanelrefs\"\n \n\n\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.04.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics pipeline that ...\n\n\n\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\n\nNow, you can run the pipeline using:\n\n\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "creativeWorkStatus": "Stable", + "datePublished": "2025-11-20T13:26:32+00:00", + "description": "

\n \n \n \"nf-core/createpanelrefs\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/createpanelrefs)\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics helper pipeline that will help in creating panel of normals and other models.\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n2. Build Panel of Normals for [`CNVKIT`](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873)\n3. Build ploidy and cnv calling models for [`GATK's germlinecnvcaller workflow`](https://genome.cshlp.org/content/20/9/1297)\n4. Build Panel of Normals for [`GENS`](https://github.com/Clinical-Genomics-Lund/gens)\n5. Build Panel of Normals for [`Mutect2`](https://genome.cshlp.org/content/20/9/1297)\n6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,bam,bai,cram,crai\nsample1,sample1.bam,sample1.bai,,\nsample2,sample2.bam,,,\nsample3,sample3.bam,sample3.bai,,\nsample4,sample4.bam,,,\n```\n\nEach row in the samplesheet represents an alignment file, and it is important that you provide the files in the right format for the analysis you want to run.\n\n| Tool | Alignment format |\n| ----------------- | ---------------------------- |\n| cnvkit | bam |\n| germlinecnvcaller | bam or cram or a mix of both |\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --tools \\\n --genome GATK.GRCh38 \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n@marrip contributed in the idea that started it all.\n@matthdsm and @FriederikeHanssen contributed in the actual design.\n@ramprasadn's interest was the final push that led to the creation.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @jfy133\n- @JoseEspinosa\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -99,7 +99,7 @@ }, "mentions": [ { - "@id": "#3adc6bc9-5659-4ed2-aff7-659417321aab" + "@id": "#f7d49604-4335-43dd-a5d4-d461d3d8aba6" } ], "name": "nf-core/createpanelrefs" @@ -121,21 +121,37 @@ }, { "@id": "main.nf", - "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow" + ], "dateCreated": "", - "dateModified": "2025-06-03T11:01:15Z", + "dateModified": "2025-11-20T14:26:32Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", - "keywords": ["nf-core", "nextflow"], - "license": ["MIT"], - "name": ["nf-core/createpanelrefs"], + "keywords": [ + "nf-core", + "nextflow" + ], + "license": [ + "MIT" + ], + "name": [ + "nf-core/createpanelrefs" + ], "programmingLanguage": { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" }, "sdPublisher": { "@id": "https://nf-co.re/" }, - "url": ["https://github.com/nf-core/createpanelrefs", "https://nf-co.re/createpanelrefs/dev/"], - "version": ["1.0dev"] + "url": [ + "https://github.com/nf-core/createpanelrefs", + "https://nf-co.re/createpanelrefs/1.0.0/" + ], + "version": [ + "1.0.0" + ] }, { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", @@ -147,14 +163,14 @@ "url": { "@id": "https://www.nextflow.io/" }, - "version": "!>=24.04.2" + "version": "!>=25.04.0" }, { - "@id": "#3adc6bc9-5659-4ed2-aff7-659417321aab", + "@id": "#f7d49604-4335-43dd-a5d4-d461d3d8aba6", "@type": "TestSuite", "instance": [ { - "@id": "#0f1c2baa-1425-40cc-9f1d-c2c88cf08e1b" + "@id": "#02ac16d0-42cf-4dcd-98f4-65b685078d57" } ], "mainEntity": { @@ -163,7 +179,7 @@ "name": "Test suite for nf-core/createpanelrefs" }, { - "@id": "#0f1c2baa-1425-40cc-9f1d-c2c88cf08e1b", + "@id": "#02ac16d0-42cf-4dcd-98f4-65b685078d57", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/createpanelrefs", "resource": "repos/nf-core/createpanelrefs/actions/workflows/nf-test.yml", @@ -292,4 +308,4 @@ "url": "https://nf-co.re/" } ] -} +} \ No newline at end of file diff --git a/subworkflows/local/gens_pon/main.nf b/subworkflows/local/gens_pon/main.nf new file mode 100644 index 0000000..db3c2f1 --- /dev/null +++ b/subworkflows/local/gens_pon/main.nf @@ -0,0 +1,134 @@ +include { GATK4_COLLECTREADCOUNTS } from '../../../modules/nf-core/gatk4/collectreadcounts' +include { GATK4_CREATEREADCOUNTPANELOFNORMALS } from '../../../modules/nf-core/gatk4/createreadcountpanelofnormals' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index' +include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view' +include { MOSDEPTH } from '../../../modules/nf-core/mosdepth' +include { GAWK as MOSDEPTH_GATK_HEADER } from '../../../modules/nf-core/gawk' +include { GAWK as MOSDEPTH_GATK_FORMAT } from '../../../modules/nf-core/gawk' +include { GAWK as INTERVAL_LIST_TO_BED } from '../../../modules/nf-core/gawk' +include { CAT_CAT } from '../../../modules/nf-core/cat/cat' + +workflow GENS_PON { + take: + ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] + val_analysis_type // string: [mandatory] type of analysis ('lrs' or 'srs') + val_pon_name // string: [optional] name for panel of normals + ch_dict // channel: [optional] [ val(meta), path(dict) ] + ch_fai // channel: [optional] [ val(meta), path(fai) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_interval_list // channel: [mandatory] [ val(meta), path(interval_list) ] + + main: + versions = channel.empty() + ch_readcounts_out = channel.empty() + + // Filter out files that lack indices, and generate them + ch_input + .branch { meta, alignment, index -> + alignment_with_index: index.size() > 0 + return [meta, alignment, index] + alignment_without_index: index.size() == 0 + return [meta, alignment] + } + .set { ch_for_mix } + + SAMTOOLS_INDEX(ch_for_mix.alignment_without_index) + versions = versions.mix(SAMTOOLS_INDEX.out.versions) + + SAMTOOLS_INDEX.out.bai + .mix(SAMTOOLS_INDEX.out.crai) + .set { ch_index } + + // Collect alignment files and their indices + ch_for_mix.alignment_without_index + .join(ch_index) + .mix(ch_for_mix.alignment_with_index) + .set { ch_bam_bai } + + if (val_analysis_type == 'srs') { + ch_bam_bai + .combine(ch_interval_list.map { _meta, interval_list -> interval_list }) + .set { ch_readcounts_in } + + // Collect read counts, and generate models + GATK4_COLLECTREADCOUNTS(ch_readcounts_in, ch_fasta, ch_fai, ch_dict) + versions = versions.mix(GATK4_COLLECTREADCOUNTS.out.versions) + + GATK4_COLLECTREADCOUNTS.out.tsv + .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) + .set { ch_readcounts } + + } else if (val_analysis_type == 'lrs') { + + INTERVAL_LIST_TO_BED( + ch_interval_list, [], [] + ) + versions = versions.mix(INTERVAL_LIST_TO_BED.out.versions) + + ch_bam_bai + .combine(INTERVAL_LIST_TO_BED.out.output) + .map { meta, bam, bai, _bins_meta, bins -> + [meta, bam, bai, bins] + } + .set { ch_mosdepth_in } + + // Prepare the body + MOSDEPTH( + ch_mosdepth_in, + [[],[]] + ) + + // Prepare the header + SAMTOOLS_VIEW( + ch_bam_bai, + [[],[]], + [], + false + ) + versions = versions.mix(SAMTOOLS_VIEW.out.versions) + + MOSDEPTH_GATK_HEADER( + SAMTOOLS_VIEW.out.sam, + [], + false + ) + versions = versions.mix(MOSDEPTH_GATK_HEADER.out.versions) + + + MOSDEPTH_GATK_FORMAT( + MOSDEPTH.out.regions_bed, + [], + false + ) + versions = versions.mix(MOSDEPTH_GATK_FORMAT.out.versions) + + // Prepare GATK inputs + MOSDEPTH_GATK_HEADER.out.output + .join(MOSDEPTH_GATK_FORMAT.out.output) + .map { meta, header, body -> [meta, [header, body]] } + .set { ch_cat_in } + + CAT_CAT(ch_cat_in) + + CAT_CAT.out.file_out + .map { meta, gatk_input -> + return [meta, gatk_input] + } + .set { ch_readcounts } + + } + + ch_readcounts + .collect { _meta, readcounts -> readcounts } + .map { readcounts -> [[id: val_pon_name], readcounts] } + .set { ch_create_pon_in } + + GATK4_CREATEREADCOUNTPANELOFNORMALS(ch_create_pon_in) + + versions = versions.mix(GATK4_CREATEREADCOUNTPANELOFNORMALS.out.versions) + + emit: + genspon = GATK4_CREATEREADCOUNTPANELOFNORMALS.out.pon + readcounts = ch_readcounts_out + versions +} diff --git a/subworkflows/local/germlinecnvcaller_cohort/main.nf b/subworkflows/local/germlinecnvcaller_cohort/main.nf new file mode 100644 index 0000000..4c63478 --- /dev/null +++ b/subworkflows/local/germlinecnvcaller_cohort/main.nf @@ -0,0 +1,174 @@ +include { GATK4_ANNOTATEINTERVALS } from '../../../modules/nf-core/gatk4/annotateintervals' +include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_TARGETS } from '../../../modules/nf-core/gatk4/bedtointervallist' +include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_EXCLUDE } from '../../../modules/nf-core/gatk4/bedtointervallist' +include { GATK4_COLLECTREADCOUNTS } from '../../../modules/nf-core/gatk4/collectreadcounts' +include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../../modules/nf-core/gatk4/determinegermlinecontigploidy' +include { GATK4_FILTERINTERVALS } from '../../../modules/nf-core/gatk4/filterintervals' +include { GATK4_GERMLINECNVCALLER } from '../../../modules/nf-core/gatk4/germlinecnvcaller' +include { GATK4_INDEXFEATUREFILE as GATK4_INDEXFEATUREFILE_MAPPABILITY } from '../../../modules/nf-core/gatk4/indexfeaturefile' +include { GATK4_INDEXFEATUREFILE as GATK4_INDEXFEATUREFILE_SEGDUP } from '../../../modules/nf-core/gatk4/indexfeaturefile' +include { GATK4_INTERVALLISTTOOLS } from '../../../modules/nf-core/gatk4/intervallisttools' +include { GATK4_PREPROCESSINTERVALS } from '../../../modules/nf-core/gatk4/preprocessintervals' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index' + +workflow GERMLINECNVCALLER_COHORT { + take: + ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] + val_pon_name // string: [optional] name for panel of normals + ch_dict // channel: [optional] [ val(meta), path(dict) ] + ch_fai // channel: [optional] [ val(meta), path(fai) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_exclude_bed // channel: [optional] [ val(meta), path(bed) ] + ch_user_exclude_interval_list // channel: [optional] [ val(meta), path(intervals) ] + ch_mappable_regions // channel: [optional] [ val(meta), path(bed) ] + ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] + ch_segmental_duplications // channel: [optional] [ val(meta), path(bed) ] + ch_target_bed // channel: [optional] [ val(meta), path(bed) ] + ch_user_target_interval_list // channel: [optional] [ val(meta), path(intervals) ] + + main: + versions = channel.empty() + + // Prepare references + GATK4_INDEXFEATUREFILE_MAPPABILITY(ch_mappable_regions) + GATK4_INDEXFEATUREFILE_SEGDUP(ch_segmental_duplications) + + //Runs for wes analysis, when target_bed file is provided instead of target_interval_list + GATK4_BEDTOINTERVALLIST_TARGETS(ch_target_bed, ch_dict) + + //Runs for wes analysis, when exclude_bed file is provided instead of target_interval_list + GATK4_BEDTOINTERVALLIST_EXCLUDE(ch_exclude_bed, ch_dict) + + ch_user_target_interval_list + .combine(GATK4_BEDTOINTERVALLIST_TARGETS.out.interval_list.ifEmpty(null)) + .branch { it -> + intervallistfrompath: it[2].equals(null) + return [it[0], it[1]] + intervallistfrombed: !it[2].equals(null) + return [it[2], it[3]] + } + .set { ch_targets_for_mix } + + ch_targets_for_mix.intervallistfrompath + .mix(ch_targets_for_mix.intervallistfrombed) + .collect() + .set { ch_target_interval_list } + + ch_user_exclude_interval_list + .combine(GATK4_BEDTOINTERVALLIST_EXCLUDE.out.interval_list.ifEmpty(null)) + .branch { it -> + intervallistfrompath: it[2].equals(null) + return [it[0], it[1]] + intervallistfrombed: !it[2].equals(null) + return [it[2], it[3]] + } + .set { ch_exclude_for_mix } + + ch_exclude_for_mix.intervallistfrompath + .mix(ch_exclude_for_mix.intervallistfrombed) + .collect() + .set { ch_exclude_interval_list } + + GATK4_PREPROCESSINTERVALS( + ch_fasta, + ch_fai, + ch_dict, + ch_target_interval_list, + ch_exclude_interval_list, + ) + + GATK4_ANNOTATEINTERVALS( + GATK4_PREPROCESSINTERVALS.out.interval_list, + ch_fasta, + ch_fai, + ch_dict, + ch_mappable_regions, + GATK4_INDEXFEATUREFILE_MAPPABILITY.out.index.ifEmpty([[:], []]), + ch_segmental_duplications, + GATK4_INDEXFEATUREFILE_SEGDUP.out.index.ifEmpty([[:], []]), + ) + + // Filter out files that lack indices, and generate them + ch_input + .branch { meta, alignment, index -> + alignment_with_index: index.size() > 0 + return [meta, alignment, index] + alignment_without_index: index.size() == 0 + return [meta, alignment] + } + .set { ch_for_mix } + + SAMTOOLS_INDEX(ch_for_mix.alignment_without_index) + + SAMTOOLS_INDEX.out.bai + .mix(SAMTOOLS_INDEX.out.crai) + .set { ch_index } + + // Collect alignment files and their indices + ch_for_mix.alignment_without_index + .join(ch_index) + .mix(ch_for_mix.alignment_with_index) + .combine(GATK4_PREPROCESSINTERVALS.out.interval_list.map { it -> it[1] }) + .set { ch_readcounts_in } + + // Collect read counts, and generate models + GATK4_COLLECTREADCOUNTS( + ch_readcounts_in, + ch_fasta, + ch_fai, + ch_dict, + ) + + GATK4_COLLECTREADCOUNTS.out.tsv + .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) + .collect { it[1] } + .map { tsv -> [[id: val_pon_name], tsv] } + .set { ch_readcounts_out } + + + GATK4_FILTERINTERVALS( + GATK4_PREPROCESSINTERVALS.out.interval_list, + ch_readcounts_out, + GATK4_ANNOTATEINTERVALS.out.annotated_intervals, + ) + + GATK4_INTERVALLISTTOOLS(GATK4_FILTERINTERVALS.out.interval_list).interval_list.map { _meta, it -> it }.flatten().set { ch_intervallist_out } + + ch_readcounts_out + .combine(GATK4_FILTERINTERVALS.out.interval_list) + .map { meta, counts, _meta2, il -> [meta, counts, il, []] } + .set { ch_contigploidy_in } + + GATK4_DETERMINEGERMLINECONTIGPLOIDY( + ch_contigploidy_in, + [[:], []], + ch_ploidy_priors, + ) + + ch_readcounts_out + .combine(ch_intervallist_out) + .combine(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.calls) + .map { meta, counts, il, _meta2, calls -> [meta + [id: il.baseName], counts, il, calls, []] } + .set { ch_cnvcaller_in } + + GATK4_GERMLINECNVCALLER(ch_cnvcaller_in) + + versions = versions.mix(SAMTOOLS_INDEX.out.versions) + versions = versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) + versions = versions.mix(GATK4_BEDTOINTERVALLIST_TARGETS.out.versions) + versions = versions.mix(GATK4_BEDTOINTERVALLIST_EXCLUDE.out.versions) + versions = versions.mix(GATK4_COLLECTREADCOUNTS.out.versions) + versions = versions.mix(GATK4_ANNOTATEINTERVALS.out.versions) + versions = versions.mix(GATK4_FILTERINTERVALS.out.versions) + versions = versions.mix(GATK4_INDEXFEATUREFILE_MAPPABILITY.out.versions) + versions = versions.mix(GATK4_INDEXFEATUREFILE_SEGDUP.out.versions) + versions = versions.mix(GATK4_INTERVALLISTTOOLS.out.versions) + versions = versions.mix(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.versions) + versions = versions.mix(GATK4_GERMLINECNVCALLER.out.versions) + + emit: + cnvmodel = GATK4_GERMLINECNVCALLER.out.cohortmodel + ploidymodel = GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.model + readcounts = ch_readcounts_out + versions +} diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf new file mode 100644 index 0000000..c52af73 --- /dev/null +++ b/subworkflows/local/prepare_genome/main.nf @@ -0,0 +1,78 @@ +include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary' +include { GATK4_PREPROCESSINTERVALS as GATK4_PREPROCESSINTERVALS_GENS } from '../../../modules/nf-core/gatk4/preprocessintervals' +include { GAWK as BUILD_INTERVALS } from '../../../modules/nf-core/gawk' +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx' + +// Prepare references +workflow PREPARE_GENOME { + take: + fasta // channel: [mandatory] [ val(meta), path(fasta) ] + user_dict // channel: [optional] [ val(meta), path(dict) ] + user_fai // channel: [optional] [ val(meta), path(fai) ] + user_gens_interval_list // channel: [optional] [ val(meta), path(gens_interval_list) ] + user_mutect2_target_bed // channel: [optional] [ val(meta), path(mutect2_target_bed) ] + tools // array: [mandatory] [ tools ] + + main: + dict = channel.empty() + fai = channel.empty() + gens_interval_list = channel.empty() + mutect2_target_bed = channel.empty() + versions = channel.empty() + + // If a user_dict is provided, no fasta will be used to generate a dict + // Otherwise, GATK4_CREATESEQUENCEDICTIONARY will be run to generate a dict + fasta_for_dict = fasta + .mix(user_dict) + .groupTuple() + .filter { _meta, files -> !files[1] } + + GATK4_CREATESEQUENCEDICTIONARY(fasta_for_dict) + + dict = user_dict.mix(GATK4_CREATESEQUENCEDICTIONARY.out.dict).collect() + + // If a user_fai is provided, no fasta will be used to generate a fai + // Otherwise, SAMTOOLS_FAIDX will be run to generate a fai + fasta_for_fai = fasta + .mix(user_fai) + .groupTuple() + .filter { _meta, files -> !files[1] } + + SAMTOOLS_FAIDX(fasta_for_fai, [[:], []], false) + + fai = user_fai.mix(SAMTOOLS_FAIDX.out.fai).collect() + + // If a user_gens_interval_list is provided or if gens is not a specified tools, no fasta will be used to generate an interval list + // Otherwise, GATK4_PREPROCESSINTERVALS_GENS will be run to generate an interval list + fasta_for_interval_list = fasta + .mix(user_gens_interval_list) + .groupTuple() + .filter { _meta, files -> (tools.split(',').contains('gens') && !files[1]) } + + GATK4_PREPROCESSINTERVALS_GENS(fasta_for_interval_list, fai.collect(), dict.collect(), [[:], []], [[:], []]) + + gens_interval_list = user_gens_interval_list.mix(GATK4_PREPROCESSINTERVALS_GENS.out.interval_list).collect() + + // If a user_mutect2_target_bed is provided or if mutect2 is not a specified tools, no fai will be used to generate a target bed + // Otherwise, BUILD_INTERVALS will be run to generate a target bed + fai_for_intervals = fai + .mix(user_mutect2_target_bed) + .groupTuple() + .filter { _meta, files -> (tools.split(',').contains('mutect2') && !files[1]) } + + BUILD_INTERVALS(fai_for_intervals, [], false) + + mutect2_target_bed = user_mutect2_target_bed.mix(BUILD_INTERVALS.out.output).collect() + + versions = versions.mix(BUILD_INTERVALS.out.versions) + versions = versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) + versions = versions.mix(GATK4_PREPROCESSINTERVALS_GENS.out.versions) + versions = versions.mix(SAMTOOLS_FAIDX.out.versions) + + emit: + dict // channel: [ val(meta), path(dict) ] + fai // channel: [ val(meta), path(fai) ] + gens_interval_list // channel: [ val(meta), path(gens_interval_list) ] + mutect2_target_bed // channel: [ val(meta), path(mutect2_target_bed) ] + versions // channel: [ path(versions.yml)] +} diff --git a/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf b/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf index 90719c0..7ec8e41 100644 --- a/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf @@ -8,14 +8,15 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { samplesheetToList } from 'plugin/nf-schema' -include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' -include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' -include { imNotification } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -24,42 +25,69 @@ include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipelin */ workflow PIPELINE_INITIALISATION { - take: - version // boolean: Display version and exit - validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs + version // boolean: Display version and exit + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs nextflow_cli_args // array: List of positional nextflow CLI args - outdir // string: The output directory where the results will be saved - input // string: Path to input samplesheet + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + help // boolean: Display help message and exit + help_full // boolean: Show the full help message + show_hidden // boolean: Show hidden parameters in the help message main: - ch_versions = Channel.empty() + ch_versions = channel.empty() // // Print version and exit if required and dump pipeline parameters to JSON file // - UTILS_NEXTFLOW_PIPELINE ( + UTILS_NEXTFLOW_PIPELINE( version, true, outdir, - workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1, ) // // Validate parameters and generate parameter summary to stdout // - UTILS_NFSCHEMA_PLUGIN ( + before_text = """ +-\033[2m----------------------------------------------------\033[0m- + \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m +\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m +\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m +\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m + \033[0;32m`._,._,\'\033[0m +\033[0;35m nf-core/createpanelrefs ${workflow.manifest.version}\033[0m +-\033[2m----------------------------------------------------\033[0m- +""" + after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { doi -> " https://doi.org/${doi.trim().replace('https://doi.org/','')}"}.join("\n")}${workflow.manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/nf-core/createpanelrefs/blob/main/CITATIONS.md +""" + command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + + UTILS_NFSCHEMA_PLUGIN( workflow, validate_params, - null + null, + help, + help_full, + show_hidden, + before_text, + after_text, + command, ) // // Check config provided to the pipeline // - UTILS_NFCORE_PIPELINE ( + UTILS_NFCORE_PIPELINE( nextflow_cli_args ) @@ -69,28 +97,10 @@ workflow PIPELINE_INITIALISATION { validateInputParameters() // - // Create channel from input file provided through params.input + // Create channel from input file provided through input // - Channel - .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) - .map { - meta, fastq_1, fastq_2 -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - } - } - .groupTuple() - .map { samplesheet -> - validateInputSamplesheet(samplesheet) - } - .map { - meta, fastqs -> - return [ meta, fastqs.flatten() ] - } - .set { ch_samplesheet } + ch_samplesheet = channel.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input.json")) emit: samplesheet = ch_samplesheet @@ -104,15 +114,14 @@ workflow PIPELINE_INITIALISATION { */ workflow PIPELINE_COMPLETION { - take: - email // string: email address - email_on_fail // string: email address sent on pipeline failure + email // string: email address + email_on_fail // string: email address sent on pipeline failure plaintext_email // boolean: Send plain-text email instead of HTML - outdir // path: Path to output directory where results will be published + outdir // path: Path to output directory where results will be published monochrome_logs // boolean: Disable ANSI colour codes in log output - hook_url // string: hook URL for notifications - multiqc_report // string: Path to MultiQC report + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report main: summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") @@ -141,7 +150,7 @@ workflow PIPELINE_COMPLETION { } workflow.onError { - log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + log.error("Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting") } } @@ -157,27 +166,13 @@ def validateInputParameters() { genomeExistsError() } -// -// Validate channels from input samplesheet -// -def validateInputSamplesheet(input) { - def (metas, fastqs) = input[1..2] - - // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 - if (!endedness_ok) { - error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") - } - - return [ metas[0], fastqs ] -} // // Get attribute from genome config file e.g. fasta // def getGenomeAttribute(attribute) { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] + if (params.genomes[params.genome].containsKey(attribute)) { + return params.genomes[params.genome][attribute] } } return null @@ -188,11 +183,7 @@ def getGenomeAttribute(attribute) { // def genomeExistsError() { if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + " Currently, the available genome keys are:\n" + " ${params.genomes.keySet().join(", ")}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" error(error_string) } } @@ -204,11 +195,10 @@ def toolCitationText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() + "Tools used in the workflow included:", + "MultiQC (Ewels et al. 2016)", + ".", + ].join(' ').trim() return citation_text } @@ -218,9 +208,8 @@ def toolBibliographyText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() return reference_text } @@ -242,7 +231,10 @@ def methodsDescriptionText(mqc_methods_yaml) { temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " } meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) - } else meta["doi_text"] = "" + } + else { + meta["doi_text"] = "" + } meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " // Tool references @@ -256,7 +248,7 @@ def methodsDescriptionText(mqc_methods_yaml) { def methods_text = mqc_methods_yaml.text - def engine = new groovy.text.SimpleTemplateEngine() + def engine = new groovy.text.SimpleTemplateEngine() def description_html = engine.createTemplate(methods_text).make(meta) return description_html.toString() diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf b/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf new file mode 100644 index 0000000..f700c9f --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf @@ -0,0 +1,69 @@ +// +// Run GATK mutect2, genomicsdbimport and createsomaticpanelofnormals +// + +include { GATK4_CREATESOMATICPANELOFNORMALS } from '../../../modules/nf-core/gatk4/createsomaticpanelofnormals' +include { GATK4_GENOMICSDBIMPORT } from '../../../modules/nf-core/gatk4/genomicsdbimport' +include { GATK4_MUTECT2 } from '../../../modules/nf-core/gatk4/mutect2' + +workflow BAM_CREATE_SOM_PON_GATK { + take: + ch_mutect2_in // channel: [ val(meta), path(input), path(input_index), path(interval_file) ] + ch_fasta // channel: [ val(meta), path(fasta) ] + ch_fai // channel: [ val(meta), path(fai), path(gzi) ] + ch_dict // channel: [ val(meta), path(dict) ] + val_pon_norm // string: name for panel of normals + ch_gendb_intervals // channel: [ path(interval_file) ] + + main: + ch_versions = channel.empty() + + // + // Perform variant calling for each sample using mutect2 module in panel of normals mode + // + GATK4_MUTECT2( + ch_mutect2_in, + ch_fasta, + ch_fai, + ch_dict, + [], + [], + [], + [], + [], + [], + ) + ch_versions = ch_versions.mix(GATK4_MUTECT2.out.versions) + + // + // Convert all sample vcfs into a genomicsdb workspace using genomicsdbimport + // + ch_vcf = GATK4_MUTECT2.out.vcf.collect { _meta, vcf -> [vcf] }.toList() + ch_index = GATK4_MUTECT2.out.tbi.collect { _meta, tbi -> [tbi] }.toList() + ch_dict_gendb = ch_dict.map { _meta, dict -> [dict] }.toList() + + ch_gendb_input = channel.of([id: val_pon_norm]) + .combine(ch_vcf) + .combine(ch_index) + .combine(ch_gendb_intervals) + .combine(ch_dict_gendb) + .map { meta, vcf, tbi, interval, dict -> [meta, vcf, tbi, interval, [], dict] } + + GATK4_GENOMICSDBIMPORT(ch_gendb_input, false, false, false) + ch_versions = ch_versions.mix(GATK4_GENOMICSDBIMPORT.out.versions) + + // + // Panel of normals made from genomicsdb workspace using createsomaticpanelofnormals + // + GATK4_CREATESOMATICPANELOFNORMALS(GATK4_GENOMICSDBIMPORT.out.genomicsdb, ch_fasta, ch_fai.map { meta, fai, _gzi -> [meta, fai] }, ch_dict) + ch_versions = ch_versions.mix(GATK4_CREATESOMATICPANELOFNORMALS.out.versions) + + emit: + mutect2_vcf = GATK4_MUTECT2.out.vcf // channel: [ val(meta), path(vcf) ] + mutect2_index = GATK4_MUTECT2.out.tbi // channel: [ val(meta), path(tbi) ] + mutect2_stats = GATK4_MUTECT2.out.stats // channel: [ val(meta), path(stats) ] + genomicsdb = GATK4_GENOMICSDBIMPORT.out.genomicsdb // channel: [ val(meta), path(genomicsdb) ] + pon_vcf = GATK4_CREATESOMATICPANELOFNORMALS.out.vcf // channel: [ val(meta), path(vcf) ] + pon_index = GATK4_CREATESOMATICPANELOFNORMALS.out.tbi // channel: [ val(meta), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml b/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml new file mode 100644 index 0000000..2660836 --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml @@ -0,0 +1,69 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_create_som_pon_gatk +description: Perform variant calling on a set of normal samples using mutect2 panel of normals mode. Group them into a genomicsdbworkspace using genomicsdbimport, then use this to create a panel of normals using createsomaticpanelofnormals. +keywords: + - gatk4 + - mutect2 + - genomicsdbimport + - createsomaticpanelofnormals + - variant_calling + - genomicsdb_workspace + - panel_of_normals +components: + - gatk4/mutect2 + - gatk4/genomicsdbimport + - gatk4/createsomaticpanelofnormals +input: + - ch_mutect2_in: + type: list + description: | + An input channel containing the following files: + - input: One or more BAM/CRAM files + - input_index: The index/indices from the BAM/CRAM file(s) + - interval_file: An interval file to be used with the mutect call + Structure: [ meta, input, input_index, interval_file ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - mutect2_vcf: + type: list + description: List of compressed vcf files to be used to make the gendb workspace + pattern: "[ *.vcf.gz ]" + - mutect2_index: + type: list + description: List of indexes of mutect2_vcf files + pattern: "[ *vcf.gz.tbi ]" + - mutect2_stats: + type: list + description: List of stats files that pair with mutect2_vcf files + pattern: "[ *vcf.gz.stats ]" + - genomicsdb: + type: directory + description: Directory containing the files that compose the genomicsdb workspace. + pattern: "path/name_of_workspace" + - pon_vcf: + type: file + description: Panel of normal as compressed vcf file + pattern: "*.vcf.gz" + - pon_index: + type: file + description: Index of pon_vcf file + pattern: "*vcf.gz.tbi" +authors: + - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test deleted file mode 100644 index 68718e4..0000000 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test +++ /dev/null @@ -1,54 +0,0 @@ - -nextflow_function { - - name "Test Functions" - script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" - config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" - tag 'subworkflows' - tag 'utils_nextflow_pipeline' - tag 'subworkflows/utils_nextflow_pipeline' - - test("Test Function getWorkflowVersion") { - - function "getWorkflowVersion" - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function dumpParametersToJSON") { - - function "dumpParametersToJSON" - - when { - function { - """ - // define inputs of the function here. Example: - input[0] = "$outputDir" - """.stripIndent() - } - } - - then { - assertAll( - { assert function.success } - ) - } - } - - test("Test Function checkCondaChannels") { - - function "checkCondaChannels" - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } -} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap deleted file mode 100644 index e3f0baf..0000000 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap +++ /dev/null @@ -1,20 +0,0 @@ -{ - "Test Function getWorkflowVersion": { - "content": [ - "v9.9.9" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:02:05.308243" - }, - "Test Function checkCondaChannels": { - "content": null, - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:02:12.425833" - } -} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test deleted file mode 100644 index 02dbf09..0000000 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test +++ /dev/null @@ -1,113 +0,0 @@ -nextflow_workflow { - - name "Test Workflow UTILS_NEXTFLOW_PIPELINE" - script "../main.nf" - config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" - workflow "UTILS_NEXTFLOW_PIPELINE" - tag 'subworkflows' - tag 'utils_nextflow_pipeline' - tag 'subworkflows/utils_nextflow_pipeline' - - test("Should run no inputs") { - - when { - workflow { - """ - print_version = false - dump_parameters = false - outdir = null - check_conda_channels = false - - input[0] = print_version - input[1] = dump_parameters - input[2] = outdir - input[3] = check_conda_channels - """ - } - } - - then { - assertAll( - { assert workflow.success } - ) - } - } - - test("Should print version") { - - when { - workflow { - """ - print_version = true - dump_parameters = false - outdir = null - check_conda_channels = false - - input[0] = print_version - input[1] = dump_parameters - input[2] = outdir - input[3] = check_conda_channels - """ - } - } - - then { - expect { - with(workflow) { - assert success - assert "nextflow_workflow v9.9.9" in stdout - } - } - } - } - - test("Should dump params") { - - when { - workflow { - """ - print_version = false - dump_parameters = true - outdir = 'results' - check_conda_channels = false - - input[0] = false - input[1] = true - input[2] = outdir - input[3] = false - """ - } - } - - then { - assertAll( - { assert workflow.success } - ) - } - } - - test("Should not create params JSON if no output directory") { - - when { - workflow { - """ - print_version = false - dump_parameters = true - outdir = null - check_conda_channels = false - - input[0] = false - input[1] = true - input[2] = outdir - input[3] = false - """ - } - } - - then { - assertAll( - { assert workflow.success } - ) - } - } -} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config deleted file mode 100644 index a09572e..0000000 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config +++ /dev/null @@ -1,9 +0,0 @@ -manifest { - name = 'nextflow_workflow' - author = """nf-core""" - homePage = 'https://127.0.0.1' - description = """Dummy pipeline""" - nextflowVersion = '!>=23.04.0' - version = '9.9.9' - doi = 'https://doi.org/10.5281/zenodo.5070524' -} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml deleted file mode 100644 index f847611..0000000 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/utils_nextflow_pipeline: - - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index bfd2587..2f30e9a 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -98,7 +98,7 @@ def workflowVersionToYAML() { // Get channel of software versions used in pipeline in YAML format // def softwareVersionsToYAML(ch_versions) { - return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(channel.of(workflowVersionToYAML())) } // diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test deleted file mode 100644 index f117040..0000000 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test +++ /dev/null @@ -1,126 +0,0 @@ - -nextflow_function { - - name "Test Functions" - script "../main.nf" - config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" - tag "subworkflows" - tag "subworkflows_nfcore" - tag "utils_nfcore_pipeline" - tag "subworkflows/utils_nfcore_pipeline" - - test("Test Function checkConfigProvided") { - - function "checkConfigProvided" - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function checkProfileProvided") { - - function "checkProfileProvided" - - when { - function { - """ - input[0] = [] - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function without logColours") { - - function "logColours" - - when { - function { - """ - input[0] = true - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function with logColours") { - function "logColours" - - when { - function { - """ - input[0] = false - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function getSingleReport with a single file") { - function "getSingleReport" - - when { - function { - """ - input[0] = file(params.modules_testdata_base_path + '/generic/tsv/test.tsv', checkIfExists: true) - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert function.result.contains("test.tsv") } - ) - } - } - - test("Test Function getSingleReport with multiple files") { - function "getSingleReport" - - when { - function { - """ - input[0] = [ - file(params.modules_testdata_base_path + '/generic/tsv/test.tsv', checkIfExists: true), - file(params.modules_testdata_base_path + '/generic/tsv/network.tsv', checkIfExists: true), - file(params.modules_testdata_base_path + '/generic/tsv/expression.tsv', checkIfExists: true) - ] - """ - } - } - - then { - assertAll( - { assert function.success }, - { assert function.result.contains("test.tsv") }, - { assert !function.result.contains("network.tsv") }, - { assert !function.result.contains("expression.tsv") } - ) - } - } -} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap deleted file mode 100644 index 02c6701..0000000 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap +++ /dev/null @@ -1,136 +0,0 @@ -{ - "Test Function checkProfileProvided": { - "content": null, - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:03.360873" - }, - "Test Function checkConfigProvided": { - "content": [ - true - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:02:59.729647" - }, - "Test Function without logColours": { - "content": [ - { - "reset": "", - "bold": "", - "dim": "", - "underlined": "", - "blink": "", - "reverse": "", - "hidden": "", - "black": "", - "red": "", - "green": "", - "yellow": "", - "blue": "", - "purple": "", - "cyan": "", - "white": "", - "bblack": "", - "bred": "", - "bgreen": "", - "byellow": "", - "bblue": "", - "bpurple": "", - "bcyan": "", - "bwhite": "", - "ublack": "", - "ured": "", - "ugreen": "", - "uyellow": "", - "ublue": "", - "upurple": "", - "ucyan": "", - "uwhite": "", - "iblack": "", - "ired": "", - "igreen": "", - "iyellow": "", - "iblue": "", - "ipurple": "", - "icyan": "", - "iwhite": "", - "biblack": "", - "bired": "", - "bigreen": "", - "biyellow": "", - "biblue": "", - "bipurple": "", - "bicyan": "", - "biwhite": "" - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:17.969323" - }, - "Test Function with logColours": { - "content": [ - { - "reset": "\u001b[0m", - "bold": "\u001b[1m", - "dim": "\u001b[2m", - "underlined": "\u001b[4m", - "blink": "\u001b[5m", - "reverse": "\u001b[7m", - "hidden": "\u001b[8m", - "black": "\u001b[0;30m", - "red": "\u001b[0;31m", - "green": "\u001b[0;32m", - "yellow": "\u001b[0;33m", - "blue": "\u001b[0;34m", - "purple": "\u001b[0;35m", - "cyan": "\u001b[0;36m", - "white": "\u001b[0;37m", - "bblack": "\u001b[1;30m", - "bred": "\u001b[1;31m", - "bgreen": "\u001b[1;32m", - "byellow": "\u001b[1;33m", - "bblue": "\u001b[1;34m", - "bpurple": "\u001b[1;35m", - "bcyan": "\u001b[1;36m", - "bwhite": "\u001b[1;37m", - "ublack": "\u001b[4;30m", - "ured": "\u001b[4;31m", - "ugreen": "\u001b[4;32m", - "uyellow": "\u001b[4;33m", - "ublue": "\u001b[4;34m", - "upurple": "\u001b[4;35m", - "ucyan": "\u001b[4;36m", - "uwhite": "\u001b[4;37m", - "iblack": "\u001b[0;90m", - "ired": "\u001b[0;91m", - "igreen": "\u001b[0;92m", - "iyellow": "\u001b[0;93m", - "iblue": "\u001b[0;94m", - "ipurple": "\u001b[0;95m", - "icyan": "\u001b[0;96m", - "iwhite": "\u001b[0;97m", - "biblack": "\u001b[1;90m", - "bired": "\u001b[1;91m", - "bigreen": "\u001b[1;92m", - "biyellow": "\u001b[1;93m", - "biblue": "\u001b[1;94m", - "bipurple": "\u001b[1;95m", - "bicyan": "\u001b[1;96m", - "biwhite": "\u001b[1;97m" - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:21.714424" - } -} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test deleted file mode 100644 index 8940d32..0000000 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test +++ /dev/null @@ -1,29 +0,0 @@ -nextflow_workflow { - - name "Test Workflow UTILS_NFCORE_PIPELINE" - script "../main.nf" - config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" - workflow "UTILS_NFCORE_PIPELINE" - tag "subworkflows" - tag "subworkflows_nfcore" - tag "utils_nfcore_pipeline" - tag "subworkflows/utils_nfcore_pipeline" - - test("Should run without failures") { - - when { - workflow { - """ - input[0] = [] - """ - } - } - - then { - assertAll( - { assert workflow.success }, - { assert snapshot(workflow.out).match() } - ) - } - } -} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap deleted file mode 100644 index 859d103..0000000 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap +++ /dev/null @@ -1,19 +0,0 @@ -{ - "Should run without failures": { - "content": [ - { - "0": [ - true - ], - "valid_config": [ - true - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:25.726491" - } -} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config deleted file mode 100644 index d0a926b..0000000 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config +++ /dev/null @@ -1,9 +0,0 @@ -manifest { - name = 'nextflow_workflow' - author = """nf-core""" - homePage = 'https://127.0.0.1' - description = """Dummy pipeline""" - nextflowVersion = '!>=23.04.0' - version = '9.9.9' - doi = 'https://doi.org/10.5281/zenodo.5070524' -} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml deleted file mode 100644 index ac8523c..0000000 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/utils_nfcore_pipeline: - - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf index 4994303..ee4738c 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/main.nf +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -4,6 +4,7 @@ include { paramsSummaryLog } from 'plugin/nf-schema' include { validateParameters } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' workflow UTILS_NFSCHEMA_PLUGIN { @@ -15,29 +16,56 @@ workflow UTILS_NFSCHEMA_PLUGIN { // when this input is empty it will automatically use the configured schema or // "${projectDir}/nextflow_schema.json" as default. This input should not be empty // for meta pipelines + help // boolean: show help message + help_full // boolean: show full help message + show_hidden // boolean: show hidden parameters in help message + before_text // string: text to show before the help message and parameters summary + after_text // string: text to show after the help message and parameters summary + command // string: an example command of the pipeline main: + if(help || help_full) { + help_options = [ + beforeText: before_text, + afterText: after_text, + command: command, + showHidden: show_hidden, + fullHelp: help_full, + ] + if(parameters_schema) { + help_options << [parametersSchema: parameters_schema] + } + log.info paramsHelp( + help_options, + params.help instanceof String ? params.help : "", + ) + exit 0 + } + // // Print parameter summary to stdout. This will display the parameters // that differ from the default given in the JSON schema // + + summary_options = [:] if(parameters_schema) { - log.info paramsSummaryLog(input_workflow, parameters_schema:parameters_schema) - } else { - log.info paramsSummaryLog(input_workflow) + summary_options << [parametersSchema: parameters_schema] } + log.info before_text + log.info paramsSummaryLog(summary_options, input_workflow) + log.info after_text // // Validate the parameters using nextflow_schema.json or the schema // given via the validation.parametersSchema configuration option // if(validate_params) { + validateOptions = [:] if(parameters_schema) { - validateParameters(parameters_schema:parameters_schema) - } else { - validateParameters() + validateOptions << [parametersSchema: parameters_schema] } + validateParameters(validateOptions) } emit: diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test deleted file mode 100644 index 8fb3016..0000000 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test +++ /dev/null @@ -1,117 +0,0 @@ -nextflow_workflow { - - name "Test Subworkflow UTILS_NFSCHEMA_PLUGIN" - script "../main.nf" - workflow "UTILS_NFSCHEMA_PLUGIN" - - tag "subworkflows" - tag "subworkflows_nfcore" - tag "subworkflows/utils_nfschema_plugin" - tag "plugin/nf-schema" - - config "./nextflow.config" - - test("Should run nothing") { - - when { - - params { - test_data = '' - } - - workflow { - """ - validate_params = false - input[0] = workflow - input[1] = validate_params - input[2] = "" - """ - } - } - - then { - assertAll( - { assert workflow.success } - ) - } - } - - test("Should validate params") { - - when { - - params { - test_data = '' - outdir = null - } - - workflow { - """ - validate_params = true - input[0] = workflow - input[1] = validate_params - input[2] = "" - """ - } - } - - then { - assertAll( - { assert workflow.failed }, - { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } - ) - } - } - - test("Should run nothing - custom schema") { - - when { - - params { - test_data = '' - } - - workflow { - """ - validate_params = false - input[0] = workflow - input[1] = validate_params - input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" - """ - } - } - - then { - assertAll( - { assert workflow.success } - ) - } - } - - test("Should validate params - custom schema") { - - when { - - params { - test_data = '' - outdir = null - } - - workflow { - """ - validate_params = true - input[0] = workflow - input[1] = validate_params - input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" - """ - } - } - - then { - assertAll( - { assert workflow.failed }, - { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } - ) - } - } -} diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config deleted file mode 100644 index 0907ac5..0000000 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -plugins { - id "nf-schema@2.1.0" -} - -validation { - parametersSchema = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" - monochromeLogs = true -} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json deleted file mode 100644 index 331e0d2..0000000 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", - "title": ". pipeline parameters", - "description": "", - "type": "object", - "$defs": { - "input_output_options": { - "title": "Input/output options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data and save output data.", - "required": ["outdir"], - "properties": { - "validate_params": { - "type": "boolean", - "description": "Validate parameters?", - "default": true, - "hidden": true - }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" - }, - "test_data_base": { - "type": "string", - "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", - "description": "Base for test data directory", - "hidden": true - }, - "test_data": { - "type": "string", - "description": "Fake test data param", - "hidden": true - } - } - }, - "generic_options": { - "title": "Generic options", - "type": "object", - "fa_icon": "fas fa-file-import", - "description": "Less common options for the pipeline, typically set in a config file.", - "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", - "properties": { - "help": { - "type": "boolean", - "description": "Display help text.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, - "version": { - "type": "boolean", - "description": "Display version and exit.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, - "logo": { - "type": "boolean", - "default": true, - "description": "Display nf-core logo in console output.", - "fa_icon": "fas fa-image", - "hidden": true - }, - "singularity_pull_docker_container": { - "type": "boolean", - "description": "Pull Singularity container from Docker?", - "hidden": true - }, - "publish_dir_mode": { - "type": "string", - "default": "copy", - "description": "Method used to save pipeline results to output directory.", - "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", - "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], - "hidden": true - }, - "monochrome_logs": { - "type": "boolean", - "description": "Use monochrome_logs", - "hidden": true - } - } - } - }, - "allOf": [ - { - "$ref": "#/$defs/input_output_options" - }, - { - "$ref": "#/$defs/generic_options" - } - ] -} diff --git a/tests/.nftignore b/tests/.nftignore index c10bc1f..a3d1717 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -1,10 +1,45 @@ .DS_Store +gatk4/*.{vcf.gz,vcf.gz.tbi} +gatk4/test/* +gatk4/test/** +gens_pon/createreadcountpanelofnormals/gens_pon.hdf5 +germlinecnvcaller/determinegermlinecontigploidy/cohort-model/mu_psi_j_log__.tsv +germlinecnvcaller/determinegermlinecontigploidy/cohort-model/std_psi_j_log__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/baseline_copy_number_t.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/log_c_emission_tc.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/log_q_c_tc.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/mu_denoised_copy_ratio_t.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/mu_psi_s_log__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/mu_read_depth_s_log__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/mu_z_su.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/sample_name.txt +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/std_denoised_copy_ratio_t.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/std_psi_s_log__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/std_read_depth_s_log__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/std_z_su.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/log_q_tau_tk.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/mu_W_tu.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/mu_ard_u_interval__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/mu_ard_u_log__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/mu_log_mean_bias_t.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/mu_psi_t_log__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/std_W_tu.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/std_ard_u_interval__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/std_ard_u_log__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/std_log_mean_bias_t.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/std_psi_t_log__.tsv +germlinecnvcaller/readcounts/*.hdf5 multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt +multiqc/multiqc_data/llms-full.txt multiqc/multiqc_data/multiqc.log +multiqc/multiqc_data/multiqc.parquet multiqc/multiqc_data/multiqc_data.json -multiqc/multiqc_data/multiqc_sources.txt +multiqc/multiqc_data/multiqc_general_stats.txt multiqc/multiqc_data/multiqc_software_versions.txt +multiqc/multiqc_data/multiqc_sources.txt multiqc/multiqc_plots/{svg,pdf,png}/*.{svg,pdf,png} multiqc/multiqc_report.html -fastqc/*_fastqc.{html,zip} pipeline_info/*.{html,json,txt,yml} +references/genome.dict +references/intervals/gens_pon/genome.interval_list diff --git a/tests/csv/1.0.0/bam.csv b/tests/csv/1.0.0/bam.csv new file mode 100644 index 0000000..47a938f --- /dev/null +++ b/tests/csv/1.0.0/bam.csv @@ -0,0 +1,3 @@ +sample,bam,bai +normal,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai +tumour,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai diff --git a/tests/csv/1.0.0/bam_cram.csv b/tests/csv/1.0.0/bam_cram.csv new file mode 100644 index 0000000..a10b04c --- /dev/null +++ b/tests/csv/1.0.0/bam_cram.csv @@ -0,0 +1,5 @@ +sample,bam,cram +sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam +sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam +sample3,,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram +sample4,,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram diff --git a/tests/csv/1.0.0/bam_sorted.csv b/tests/csv/1.0.0/bam_sorted.csv new file mode 100644 index 0000000..f3fd62d --- /dev/null +++ b/tests/csv/1.0.0/bam_sorted.csv @@ -0,0 +1,3 @@ +sample,bam,bai +testN,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai +testT,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai diff --git a/tests/csv/1.0.0/cram.csv b/tests/csv/1.0.0/cram.csv new file mode 100644 index 0000000..9aaa6b0 --- /dev/null +++ b/tests/csv/1.0.0/cram.csv @@ -0,0 +1,3 @@ +sample,cram,crai +sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram.crai +sample4,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram.crai diff --git a/tests/default.nf.test b/tests/default.nf.test index b8bd0fd..c15a368 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -3,11 +3,13 @@ nextflow_pipeline { name "Test pipeline" script "../main.nf" tag "pipeline" + tag "pipeline_createpanelrefs" - test("-profile test") { + test("-profile test --tools cnvkit --input tests/csv/1.0.0/bam_cram.csv") { when { params { + input = "${projectDir}/tests/csv/1.0.0/bam_cram.csv" outdir = "$outputDir" } } @@ -17,13 +19,11 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success assertAll( - { assert workflow.success}, { assert snapshot( - // Number of successful tasks - workflow.trace.succeeded().size(), - // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml"), + // pipeline versions.yml file for multiqc from which Nextflow and pipeline versions are removed (all from the workflow key) + removeFromYamlMap("${outputDir}/pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", "Workflow"), // All stable path name, with a relative path stable_name, // All files with stable contents diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap new file mode 100644 index 0000000..6f1540f --- /dev/null +++ b/tests/default.nf.test.snap @@ -0,0 +1,58 @@ +{ + "-profile test --tools cnvkit --input tests/csv/1.0.0/bam_cram.csv": { + "content": [ + { + "CNVKIT_BATCH": { + "cnvkit": "0.9.12" + } + }, + [ + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", + "reference", + "reference/cnvkit", + "reference/cnvkit/panel.cnn", + "reference/cnvkit/sample3.antitargetcoverage.cnn", + "reference/cnvkit/sample3.targetcoverage.cnn", + "reference/cnvkit/sample4.antitargetcoverage.cnn", + "reference/cnvkit/sample4.targetcoverage.cnn", + "reference/cnvkit/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn", + "reference/cnvkit/test.paired_end.recalibrated.sorted.targetcoverage.cnn", + "reference/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn", + "reference/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn", + "samtools", + "samtools/sample3.bam", + "samtools/sample4.bam" + ], + [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "panel.cnn:md5,bb11c7ec8b2a5679fcabc4dbfa83294a", + "sample3.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "sample3.targetcoverage.cnn:md5,814200aceed64f3e0c4a69dab64553c4", + "sample4.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "sample4.targetcoverage.cnn:md5,ae3bfc49096f86e48c37bc9b997982fb", + "test.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,814200aceed64f3e0c4a69dab64553c4", + "test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test2.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,ae3bfc49096f86e48c37bc9b997982fb", + "sample3.bam:md5,945810b0063a00721d75ff8fbfec2e82", + "sample4.bam:md5,03417a2d44a2da7a2b7ae3be276c273e" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-31T14:28:40.413006402" + } +} \ No newline at end of file diff --git a/tests/gens_pon.config b/tests/gens_pon.config new file mode 100644 index 0000000..6f069fd --- /dev/null +++ b/tests/gens_pon.config @@ -0,0 +1,12 @@ +process { + withName: 'GATK4_CREATEREADCOUNTPANELOFNORMALS' { + ext.args = "--minimum-interval-median-percentile 10 --number-of-eigensamples 2" + } + +} + +profiles { + docker { + docker.runOptions = '-u root' + } +} diff --git a/tests/gens_pon.nf.test b/tests/gens_pon.nf.test new file mode 100644 index 0000000..fed4001 --- /dev/null +++ b/tests/gens_pon.nf.test @@ -0,0 +1,77 @@ +nextflow_pipeline { + + name "Test pipeline | gens" + script "../main.nf" + tag "pipeline" + tag "pipeline_createpanelrefs" + config "./gens_pon.config" + + test("-profile test --tools gens --input tests/csv/1.0.0/bam_sorted.csv --gens_bin_length 100 --gens_pon_name gens_pon --gens_readcount_format TSV") { + + when { + params { + genome = 'GRCh38.chr22.testdata' + gens_bin_length = 100 + gens_interval_list = null + gens_pon_name = 'gens_pon' + gens_readcount_format = "TSV" + input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" + outdir = "$outputDir" + tools = 'gens' + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success + assertAll( + { assert snapshot( + // pipeline versions.yml file for multiqc from which Nextflow and pipeline versions are removed (all from the workflow key) + removeFromYamlMap("${outputDir}/pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", "Workflow"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } + + test("-profile test --tools gens --input tests/csv/1.0.0/bam_sorted.csv --gens_bin_length 100 --gens_pon_name gens_pon --gens_readcount_format TSV --gens_analysis_type lrs") { + + when { + params { + genome = 'GRCh38.chr22.testdata' + gens_analysis_type = 'lrs' + gens_bin_length = 100 + gens_interval_list = null + gens_pon_name = 'gens_pon' + gens_readcount_format = "TSV" + input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" + outdir = "$outputDir" + tools = 'gens' + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success + assertAll( + { assert snapshot( + // pipeline versions.yml file for multiqc from which Nextflow and pipeline versions are removed (all from the workflow key) + removeFromYamlMap("${outputDir}/pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", "Workflow"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/gens_pon.nf.test.snap b/tests/gens_pon.nf.test.snap new file mode 100644 index 0000000..1e76976 --- /dev/null +++ b/tests/gens_pon.nf.test.snap @@ -0,0 +1,106 @@ +{ + "-profile test --tools gens --input tests/csv/1.0.0/bam_sorted.csv --gens_bin_length 100 --gens_pon_name gens_pon --gens_readcount_format TSV --gens_analysis_type lrs": { + "content": [ + { + "GATK4_CREATEREADCOUNTPANELOFNORMALS": { + "gatk4": "4.6.2.0" + }, + "GATK4_PREPROCESSINTERVALS_GENS": { + "gatk4": "4.6.2.0" + }, + "INTERVAL_LIST_TO_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH_GATK_FORMAT": { + "gawk": "5.3.0" + }, + "MOSDEPTH_GATK_HEADER": { + "gawk": "5.3.0" + }, + "SAMTOOLS_VIEW": { + "samtools": "1.22.1" + } + }, + [ + "gens_pon", + "gens_pon/createreadcountpanelofnormals", + "gens_pon/createreadcountpanelofnormals/gens_pon.hdf5", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", + "references", + "references/intervals", + "references/intervals/gens_pon", + "references/intervals/gens_pon/genome.interval_list", + "references/intervals/gens_pon/gens_coverage_bins.bed" + ], + [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "gens_coverage_bins.bed:md5,b5c7f328aaf419595302baaa16f5b649" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.12.0" + }, + "timestamp": "2026-02-06T14:48:19.423593928" + }, + "-profile test --tools gens --input tests/csv/1.0.0/bam_sorted.csv --gens_bin_length 100 --gens_pon_name gens_pon --gens_readcount_format TSV": { + "content": [ + { + "GATK4_COLLECTREADCOUNTS": { + "gatk4": "4.6.2.0" + }, + "GATK4_CREATEREADCOUNTPANELOFNORMALS": { + "gatk4": "4.6.2.0" + }, + "GATK4_PREPROCESSINTERVALS_GENS": { + "gatk4": "4.6.2.0" + } + }, + [ + "gens_pon", + "gens_pon/createreadcountpanelofnormals", + "gens_pon/createreadcountpanelofnormals/gens_pon.hdf5", + "gens_pon/readcounts", + "gens_pon/readcounts/testN.tsv", + "gens_pon/readcounts/testT.tsv", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", + "references", + "references/intervals", + "references/intervals/gens_pon", + "references/intervals/gens_pon/genome.interval_list" + ], + [ + "testN.tsv:md5,bfceab35109b04317f38f9cc1cde71ca", + "testT.tsv:md5,7141d08cdc26f6057557be9e23ef4365", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.12.0" + }, + "timestamp": "2026-02-06T14:49:45.524921421" + } +} \ No newline at end of file diff --git a/tests/germlinecnvcaller_cohort.config b/tests/germlinecnvcaller_cohort.config new file mode 100644 index 0000000..defe024 --- /dev/null +++ b/tests/germlinecnvcaller_cohort.config @@ -0,0 +1,3 @@ +env { + THEANO_FLAGS='base_compiledir=.' +} diff --git a/tests/germlinecnvcaller_cohort.nf.test b/tests/germlinecnvcaller_cohort.nf.test new file mode 100644 index 0000000..c316dd0 --- /dev/null +++ b/tests/germlinecnvcaller_cohort.nf.test @@ -0,0 +1,40 @@ +nextflow_pipeline { + + name "Test pipeline | germlinecnvcaller" + script "../main.nf" + tag "pipeline" + tag "pipeline_createpanelrefs" + config "./germlinecnvcaller_cohort.config" + + test("-profile test --tools germlinecnvcaller --input tests/csv/1.0.0/bam_sorted.csv --gcnv_model_name cohort --gcnv_ploidy_priors --gcnv_scatter_content 2") { + + when { + params { + genome = 'GRCh38.chr22.testdata' + gcnv_model_name = 'cohort' + gcnv_scatter_content = 2 + input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" + outdir = "$outputDir" + tools = 'germlinecnvcaller' + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success + assertAll( + { assert snapshot( + // pipeline versions.yml file for multiqc from which Nextflow and pipeline versions are removed (all from the workflow key) + removeFromYamlMap("${outputDir}/pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", "Workflow"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/germlinecnvcaller_cohort.nf.test.snap b/tests/germlinecnvcaller_cohort.nf.test.snap new file mode 100644 index 0000000..90d9d7b --- /dev/null +++ b/tests/germlinecnvcaller_cohort.nf.test.snap @@ -0,0 +1,180 @@ +{ + "-profile test --tools germlinecnvcaller --input tests/csv/1.0.0/bam_sorted.csv --gcnv_model_name cohort --gcnv_ploidy_priors --gcnv_scatter_content 2": { + "content": [ + { + "GATK4_ANNOTATEINTERVALS": { + "gatk4": "4.6.2.0" + }, + "GATK4_COLLECTREADCOUNTS": { + "gatk4": "4.6.2.0" + }, + "GATK4_DETERMINEGERMLINECONTIGPLOIDY": { + "gatk4": "4.6.2.0" + }, + "GATK4_FILTERINTERVALS": { + "gatk4": "4.6.2.0" + }, + "GATK4_GERMLINECNVCALLER": { + "gatk4": "4.6.2.0" + }, + "GATK4_INTERVALLISTTOOLS": { + "gatk4": "4.6.2.0" + }, + "GATK4_PREPROCESSINTERVALS": { + "gatk4": "4.6.2.0" + } + }, + [ + "germlinecnvcaller", + "germlinecnvcaller/determinegermlinecontigploidy", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/contig_ploidy_prior.tsv", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/gcnvkernel_version.json", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/interval_list.tsv", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/mu_mean_bias_j_interval__.tsv", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/mu_psi_j_log__.tsv", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/ploidy_config.json", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/std_mean_bias_j_interval__.tsv", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/std_psi_j_log__.tsv", + "germlinecnvcaller/germlinecnvcaller", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/baseline_copy_number_t.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/log_c_emission_tc.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/log_q_c_tc.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/mu_denoised_copy_ratio_t.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/mu_psi_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/mu_read_depth_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/mu_z_su.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/sample_name.txt", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/std_denoised_copy_ratio_t.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/std_psi_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/std_read_depth_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/std_z_su.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/baseline_copy_number_t.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/log_c_emission_tc.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/log_q_c_tc.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/mu_denoised_copy_ratio_t.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/mu_psi_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/mu_read_depth_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/mu_z_su.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/sample_name.txt", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/std_denoised_copy_ratio_t.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/std_psi_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/std_read_depth_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/std_z_su.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/calling_config.json", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/denoising_config.json", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/gcnvkernel_version.json", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/interval_list.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/calling_config.json", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/denoising_config.json", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/gcnvkernel_version.json", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/interval_list.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/log_q_tau_tk.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/mu_W_tu.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/mu_ard_u_interval__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/mu_log_mean_bias_t.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/mu_psi_t_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/std_W_tu.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/std_ard_u_interval__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/std_log_mean_bias_t.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/std_psi_t_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/baseline_copy_number_t.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/log_c_emission_tc.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/log_q_c_tc.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/mu_denoised_copy_ratio_t.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/mu_psi_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/mu_read_depth_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/mu_z_su.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/sample_name.txt", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/std_denoised_copy_ratio_t.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/std_psi_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/std_read_depth_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/std_z_su.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/baseline_copy_number_t.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/log_c_emission_tc.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/log_q_c_tc.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/mu_denoised_copy_ratio_t.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/mu_psi_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/mu_read_depth_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/mu_z_su.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/sample_name.txt", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/std_denoised_copy_ratio_t.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/std_psi_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/std_read_depth_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/std_z_su.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/calling_config.json", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/denoising_config.json", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/gcnvkernel_version.json", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/interval_list.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/calling_config.json", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/denoising_config.json", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/gcnvkernel_version.json", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/interval_list.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/log_q_tau_tk.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/mu_W_tu.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/mu_ard_u_interval__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/mu_log_mean_bias_t.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/mu_psi_t_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_W_tu.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_ard_u_interval__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_log_mean_bias_t.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_psi_t_log__.tsv", + "germlinecnvcaller/readcounts", + "germlinecnvcaller/readcounts/testN.hdf5", + "germlinecnvcaller/readcounts/testT.hdf5", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml" + ], + [ + "contig_ploidy_prior.tsv:md5,7a2f5444b09a1f635a540bbcd23176cf", + "gcnvkernel_version.json:md5,aa0ad91e0ff23be94e09e909cf2286d8", + "interval_list.tsv:md5,8c5aaf57cf34ff35b183178a87a9f864", + "mu_mean_bias_j_interval__.tsv:md5,27ae705b7a002517873357ba7ecde39f", + "ploidy_config.json:md5,4cdc16109826fa7d3cdfd1dc8758ec27", + "std_mean_bias_j_interval__.tsv:md5,0816459105b443e6f266e725f298f0b9", + "calling_config.json:md5,dc78889e3d0ba1adf02d10db0194128e", + "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", + "gcnvkernel_version.json:md5,aa0ad91e0ff23be94e09e909cf2286d8", + "interval_list.tsv:md5,0cba6efc5fa465f7930c901916a3e764", + "calling_config.json:md5,dc78889e3d0ba1adf02d10db0194128e", + "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", + "gcnvkernel_version.json:md5,aa0ad91e0ff23be94e09e909cf2286d8", + "interval_list.tsv:md5,0cba6efc5fa465f7930c901916a3e764", + "calling_config.json:md5,dc78889e3d0ba1adf02d10db0194128e", + "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", + "gcnvkernel_version.json:md5,aa0ad91e0ff23be94e09e909cf2286d8", + "interval_list.tsv:md5,9f7718372e87e69aeb837678755a9971", + "calling_config.json:md5,dc78889e3d0ba1adf02d10db0194128e", + "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", + "gcnvkernel_version.json:md5,aa0ad91e0ff23be94e09e909cf2286d8", + "interval_list.tsv:md5,9f7718372e87e69aeb837678755a9971", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.12.0" + }, + "timestamp": "2026-02-06T15:12:44.760563402" + } +} \ No newline at end of file diff --git a/tests/mutect2.nf.test b/tests/mutect2.nf.test new file mode 100644 index 0000000..d8208e3 --- /dev/null +++ b/tests/mutect2.nf.test @@ -0,0 +1,66 @@ +nextflow_pipeline { + + name "Test MUTECT2_PON" + script "main.nf" + tag "MUTECT2" + + test("-profile test --tools mutect2 --mutect2_pon_name test") { + + when { + params { + mutect2_pon_name = 'test' + outdir = "$outputDir" + tools = 'mutect2' + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'gatk4/test/**/*']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success + assertAll( + { assert snapshot( + // pipeline versions.yml file for multiqc from which Nextflow and pipeline versions are removed (all from the workflow key) + removeFromYamlMap("${outputDir}/pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", "Workflow"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } + + test("-profile test --tools mutect2 --mutect2_target_bed null --mutect2_pon_name test --input tests/csv/1.0.0/cram.csv") { + + when { + params { + input = "${projectDir}/tests/csv/1.0.0/cram.csv" + mutect2_pon_name = 'test' + mutect2_target_bed = null + outdir = "$outputDir" + tools = 'mutect2' + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'gatk4/test/**/*']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success + assertAll( + { assert snapshot( + // pipeline versions.yml file for multiqc from which Nextflow and pipeline versions are removed (all from the workflow key) + removeFromYamlMap("${outputDir}/pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", "Workflow"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/mutect2.nf.test.snap b/tests/mutect2.nf.test.snap new file mode 100644 index 0000000..eba04d6 --- /dev/null +++ b/tests/mutect2.nf.test.snap @@ -0,0 +1,119 @@ +{ + "-profile test --tools mutect2 --mutect2_target_bed null --mutect2_pon_name test --input tests/csv/1.0.0/cram.csv": { + "content": [ + { + "BUILD_INTERVALS": { + "gawk": "5.3.0" + }, + "GATK4_CREATESOMATICPANELOFNORMALS": { + "gatk4": "4.6.2.0" + }, + "GATK4_GENOMICSDBIMPORT": { + "gatk4": "4.6.2.0" + }, + "GATK4_MUTECT2": { + "gatk4": "4.6.2.0" + } + }, + [ + "gatk4", + "gatk4/sample3.vcf.gz", + "gatk4/sample3.vcf.gz.stats", + "gatk4/sample3.vcf.gz.tbi", + "gatk4/sample4.vcf.gz", + "gatk4/sample4.vcf.gz.stats", + "gatk4/sample4.vcf.gz.tbi", + "gatk4/test", + "gatk4/test.vcf.gz", + "gatk4/test.vcf.gz.tbi", + "gatk4/test/__tiledb_workspace.tdb", + "gatk4/test/callset.json", + "gatk4/test/chr21$1$46709983", + "gatk4/test/vcfheader.vcf", + "gatk4/test/vidmap.json", + "intervals", + "intervals/mutect2_target_bed", + "intervals/mutect2_target_bed/genome.bed", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml" + ], + [ + "sample3.vcf.gz.stats:md5,a05ace4138fc5cb993ed912d654ec22d", + "sample4.vcf.gz.stats:md5,080e6d0e254e582dfb9d5916c9637391", + "genome.bed:md5,472d213cfcde96565699779d5bfc0e32", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-31T15:11:32.603760513" + }, + "-profile test --tools mutect2 --mutect2_pon_name test": { + "content": [ + { + "GATK4_CREATESOMATICPANELOFNORMALS": { + "gatk4": "4.6.2.0" + }, + "GATK4_GENOMICSDBIMPORT": { + "gatk4": "4.6.2.0" + }, + "GATK4_MUTECT2": { + "gatk4": "4.6.2.0" + } + }, + [ + "gatk4", + "gatk4/normal.vcf.gz", + "gatk4/normal.vcf.gz.stats", + "gatk4/normal.vcf.gz.tbi", + "gatk4/test", + "gatk4/test.vcf.gz", + "gatk4/test.vcf.gz.tbi", + "gatk4/test/__tiledb_workspace.tdb", + "gatk4/test/callset.json", + "gatk4/test/chr21$2$23354000", + "gatk4/test/chr21$24132500$24910998", + "gatk4/test/chr21$25689498$46709983", + "gatk4/test/vcfheader.vcf", + "gatk4/test/vidmap.json", + "gatk4/tumour.vcf.gz", + "gatk4/tumour.vcf.gz.stats", + "gatk4/tumour.vcf.gz.tbi", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml" + ], + [ + "normal.vcf.gz.stats:md5,b569ce66bbffe9588b3d221e821023ee", + "tumour.vcf.gz.stats:md5,76f749c53212d72e98801f6030fbf8a6", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.12.0" + }, + "timestamp": "2026-02-06T11:30:00.137671531" + } +} \ No newline at end of file diff --git a/tests/nextflow.config b/tests/nextflow.config index 70081af..fe14883 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -4,9 +4,9 @@ ======================================================================================== */ -// TODO nf-core: Specify any additional parameters here -// Or any resources requirements -params.modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' -params.pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/createpanelrefs' +params { + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/createpanelrefs/' +} aws.client.anonymous = true // fixes S3 access issues on self-hosted runners diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index a670a30..e7b3cd9 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -1,97 +1,140 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS + IMPORT MODULES / SUBWORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_createpanelrefs_pipeline' -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ +include { BAM_CREATE_SOM_PON_GATK } from '../subworkflows/nf-core/bam_create_som_pon_gatk' +include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch' +include { GENS_PON } from '../subworkflows/local/gens_pon' +include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' +include { SAMTOOLS_VIEW } from '../modules/nf-core/samtools/view' workflow CREATEPANELREFS { - take: - ch_samplesheet // channel: samplesheet read in from --input + samplesheet // channel: samplesheet read in from --input + tools // array: tools to run, or no_tools if none (it's actually comma separated values string, but close enough) + gcnv_model_name // string: name of gcnv model + gens_analysis_type // string: type of analysis for gens pon ('lrs' or 'srs') + gens_pon_name // string: name of gens pon + mutect2_pon_name // string: name of mutect2 pon + fasta // channel: [meta, fasta] + dict // channel: [meta, dict] + fai // channel: [meta, fai] + cnvkit_targets // channel: [meta, cnvkit_targets] + gcnv_exclude_bed // channel: [meta, gcnv_exclude_bed] + gcnv_exclude_interval_list // channel: [meta, gcnv_exclude_interval_list] + gcnv_mappable_regions // channel: [meta, gcnv_mappable_regions] + gcnv_ploidy_priors // channel: [meta, gcnv_ploidy_priors] + gcnv_segmental_duplications // channel: [meta, gcnv_segmental_duplications] + gcnv_target_bed // channel: [meta, gcnv_target_bed] + gcnv_target_interval_list // channel: [meta, gcnv_target_interval_list] + gens_interval_list // channel: [meta, gens_interval_list] + mutect2_target_bed // channel: [meta, mutect2_target_bed] + main: + versions = channel.empty() + + if (tools.split(',').contains('cnvkit')) { - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() - // - // MODULE: Run FastQC - // - FASTQC ( - ch_samplesheet - ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - - // - // Collate and save software versions - // - softwareVersionsToYAML(ch_versions) - .collectFile( - storeDir: "${params.outdir}/pipeline_info", - name: 'nf_core_' + 'createpanelrefs_software_' + 'mqc_' + 'versions.yml', - sort: true, - newLine: true - ).set { ch_collated_versions } - - - // - // MODULE: MultiQC - // - ch_multiqc_config = Channel.fromPath( - "$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? - Channel.fromPath(params.multiqc_config, checkIfExists: true) : - Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? - Channel.fromPath(params.multiqc_logo, checkIfExists: true) : - Channel.empty() - - summary_params = paramsSummaryMap( - workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_files = ch_multiqc_files.mix( - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? - file(params.multiqc_methods_description, checkIfExists: true) : - file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value( - methodsDescriptionText(ch_multiqc_custom_methods_description)) - - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix( - ch_methods_description.collectFile( - name: 'methods_description_mqc.yaml', - sort: true + input_by_fmt = samplesheet.branch { meta, bam, _bai, cram, crai -> + bam: bam + return [meta, bam] + cram: cram + return [meta, cram, crai] + } + + cnvkit_input = SAMTOOLS_VIEW(input_by_fmt.cram, fasta, [], "").bam + .mix(input_by_fmt.bam) + .map { meta, bam -> + return [meta + [id: 'panel'], bam] + } + .groupTuple() + .map { meta, bam -> + return [meta, [], bam] + } + + CNVKIT_BATCH(cnvkit_input, fasta, [[:], []], cnvkit_targets, [[:], []], true) + + versions = versions.mix(CNVKIT_BATCH.out.versions) + } + + if (tools.split(',').contains('germlinecnvcaller')) { + + germlinecnvcaller_input = samplesheet.map { meta, bam, bai, cram, crai -> + if (bam) { + return [meta + [data_type: 'bam'], bam, bai] + } + if (cram) { + return [meta + [data_type: 'cram'], cram, crai] + } + } + + GERMLINECNVCALLER_COHORT( + germlinecnvcaller_input, + gcnv_model_name, + dict, + fai, + fasta, + gcnv_exclude_bed, + gcnv_exclude_interval_list, + gcnv_mappable_regions, + gcnv_ploidy_priors, + gcnv_segmental_duplications, + gcnv_target_bed, + gcnv_target_interval_list, ) - ) - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList(), - [], - [] - ) + versions = versions.mix(GERMLINECNVCALLER_COHORT.out.versions) + } - emit:multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] + if (tools.split(',').contains('mutect2')) { -} + mutect2_input = samplesheet.map { meta, bam, bai, cram, crai -> + if (bam) { + return [meta + [data_type: 'bam'], bam, bai, []] + } + if (cram) { + return [meta + [data_type: 'cram'], cram, crai, []] + } + } -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + BAM_CREATE_SOM_PON_GATK( + mutect2_input, + fasta, + fai.map { meta, fai_ -> [meta, fai_, []] }, + dict, + mutect2_pon_name, + mutect2_target_bed.map { _meta, target -> [target] }, + ) + + versions = versions.mix(BAM_CREATE_SOM_PON_GATK.out.versions) + } + + if (tools.split(',').contains('gens')) { + + gens_input = samplesheet.map { meta, bam, bai, cram, crai -> + if (bam) { + return [meta + [data_type: 'bam'], bam, bai] + } + if (cram) { + return [meta + [data_type: 'cram'], cram, crai] + } + } + + GENS_PON( + gens_input, + gens_analysis_type, + gens_pon_name, + dict, + fai, + fasta, + gens_interval_list, + ) + + versions = versions.mix(GENS_PON.out.versions) + } + + emit: + versions // channel: [ path(versions.yml) ] +}