diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..b290e09 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,20 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + "runArgs": ["--privileged"], + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.editorconfig b/.editorconfig index 9554950..72dda28 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,12 +8,9 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{yml,yaml}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 -[*.json] -insert_final_newline = unset - # These files are edited and tested upstream in nf-core/modules [/modules/nf-core/**] charset = unset @@ -21,7 +18,16 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset + +# ignore python and markdown +[*.{py,md}] +indent_style = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 918a0ec..6724071 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,4 +1,4 @@ -# nf-core/radseq: Contributing Guidelines +# `nf-core/radseq`: Contributing Guidelines Hi there! Many thanks for taking an interest in improving nf-core/radseq. @@ -9,6 +9,7 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) +> [!NOTE] > If you need help using or modifying nf-core/radseq then the best place to ask is on the nf-core Slack [#radseq](https://nfcore.slack.com/channels/radseq) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow @@ -18,13 +19,19 @@ If you'd like to write some code for nf-core/radseq, the standard workflow is as 1. Check that there isn't already an issue about your idea in the [nf-core/radseq issues](https://github.com/nf-core/radseq/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this 2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/radseq repository](https://github.com/nf-core/radseq) to your GitHub account 3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) -4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +4. Use `nf-core pipelines schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). 5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). ## Tests +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -33,7 +40,7 @@ There are typically two types of tests that run: ### Lint tests `nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. -To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core pipelines lint ` command. If any failures or warnings are encountered, please follow the listed URL for more documentation. @@ -48,9 +55,9 @@ These tests are run both with the latest available version of `Nextflow` and als :warning: Only in the unlikely and regretful event of a release happening with a bug. -- On your own fork, make a new branch `patch` based on `upstream/master`. +- On your own fork, make a new branch `patch` based on `upstream/main` or `upstream/master`. - Fix the bug, and bump version (X.Y.Z+1). -- A PR should be made on `master` from patch to directly this particular bug. +- Open a pull-request from `patch` to `main`/`master` with the changes. ## Getting help @@ -58,17 +65,17 @@ For further information/help, please consult the [nf-core/radseq documentation]( ## Pipeline contribution conventions -To make the nf-core/radseq code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. +To make the `nf-core/radseq` code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. ### Adding a new step If you wish to contribute a new step, please use the following coding standards: -1. Define the corresponding input channel into your new process from the expected previous process channel +1. Define the corresponding input channel into your new process from the expected previous process channel. 2. Write the process block (see below). 3. Define the output channel if needed (see below). 4. Add any new parameters to `nextflow.config` with a default (see below). -5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool). 6. Add sanity checks and validation for all relevant parameters. 7. Perform local tests to validate that the new code works as expected. 8. If applicable, add a new test command in `.github/workflow/ci.yml`. @@ -77,15 +84,15 @@ If you wish to contribute a new step, please use the following coding standards: ### Default values -Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. +Parameters should be initialised / defined with default values within the `params` scope in `nextflow.config`. -Once there, use `nf-core schema build` to add to `nextflow_schema.json`. +Once there, use `nf-core pipelines schema build` to add to `nextflow_schema.json`. ### Default processes resource requirements -Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/main/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes @@ -96,7 +103,7 @@ Please use the following naming schemes, to make it easy to understand what is g ### Nextflow version bumping -If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core pipelines bump-version --nextflow . [min-nf-version]` ### Images and figures @@ -116,4 +123,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 8625fa0..5f37e20 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,9 +42,9 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 22.10.1)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ * OS _(eg. CentOS Linux, macOS, Linux Mint)_ * Version of nf-core/radseq _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 64e25d7..4111fd5 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,9 +15,11 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/rads - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/radseq/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/radseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. -- [ ] Make sure your code lints (`nf-core lint`). +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/radseq/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/radseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] Make sure your code lints (`nf-core pipelines lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 72da2f5..c494265 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -1,34 +1,69 @@ name: nf-core AWS full size tests -# This workflow is triggered on published releases. +# This workflow is triggered on PRs opened against the main/master branch. # It can be additionally triggered manually with GitHub actions workflow dispatch button. # It runs the -profile 'test_full' on AWS batch on: - release: - types: [published] + pull_request: + branches: + - main + - master workflow_dispatch: + pull_request_review: + types: [submitted] + jobs: - run-tower: + run-platform: name: Run AWS full tests - if: github.repository == 'nf-core/radseq' + # run only if the PR is approved by at least 2 reviewers and against the master branch or manually triggered + if: github.repository == 'nf-core/radseq' && github.event.review.state == 'approved' && github.event.pull_request.base.ref == 'master' || github.event_name == 'workflow_dispatch' runs-on: ubuntu-latest steps: - - name: Launch workflow via tower - uses: nf-core/tower-action@v2 + - name: Get PR reviews + uses: octokit/request-action@v2.x + if: github.event_name != 'workflow_dispatch' + id: check_approvals + continue-on-error: true + with: + route: GET /repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/reviews?per_page=100 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Check for approvals + if: ${{ failure() && github.event_name != 'workflow_dispatch' }} + run: | + echo "No review approvals found. At least 2 approvals are required to run this action automatically." + exit 1 + + - name: Check for enough approvals (>=2) + id: test_variables + if: github.event_name != 'workflow_dispatch' + run: | + JSON_RESPONSE='${{ steps.check_approvals.outputs.data }}' + CURRENT_APPROVALS_COUNT=$(echo $JSON_RESPONSE | jq -c '[.[] | select(.state | contains("APPROVED")) ] | length') + test $CURRENT_APPROVALS_COUNT -ge 2 || exit 1 # At least 2 approvals are required + + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 # TODO nf-core: You can customise AWS full pipeline tests as required # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters - with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - pipeline: ${{ github.repository }} revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/radseq/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/radseq/results-${{ github.sha }}" } - profiles: test_full,aws_tower - pre_run_script: 'export NXF_VER=21.10.3' + profiles: test_full + + - uses: actions/upload-artifact@v4 + with: + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index d94cac3..b725320 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -5,24 +5,29 @@ name: nf-core AWS test on: workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS tests if: github.repository == 'nf-core/radseq' runs-on: ubuntu-latest steps: - - name: Launch workflow via tower - uses: nf-core/tower-action@v2 - + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - pipeline: ${{ github.repository }} revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/radseq/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/radseq/results-test-${{ github.sha }}" } - profiles: test,aws_tower - pre_run_script: 'export NXF_VER=21.10.3' + profiles: test + + - uses: actions/upload-artifact@v4 + with: + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 8f71233..d0b7cf4 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -1,28 +1,30 @@ name: nf-core branch protection -# This workflow is triggered on PRs to master branch on the repository -# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` +# This workflow is triggered on PRs to `main`/`master` branch on the repository +# It fails when someone tries to make a PR against the nf-core `main`/`master` branch instead of `dev` on: pull_request_target: - branches: [master] + branches: + - main + - master jobs: test: runs-on: ubuntu-latest steps: - # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches + # PRs to the nf-core repo main/master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches - name: Check PRs if: github.repository == 'nf-core/radseq' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/radseq ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/radseq ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | - ## This PR is against the `master` branch :x: + ## This PR is against the `${{github.event.pull_request.base.ref}}` branch :x: * Do not close this PR * Click _Edit_ and change the `base` to `dev` @@ -32,9 +34,9 @@ jobs: Hi @${{ github.event.pull_request.user.login }}, - It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. - The `master` branch on nf-core repositories should always contain code from the latest release. - Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) ${{github.event.pull_request.base.ref}} branch. + The ${{github.event.pull_request.base.ref}} branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to ${{github.event.pull_request.base.ref}} are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. Note that even after this, the test will continue to show as failing until you push a new commit. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b5d804e..3ec8bb3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,44 +7,79 @@ on: pull_request: release: types: [published] + workflow_dispatch: env: NXF_ANSI_LOG: false - CAPSULE_LOG: none + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true jobs: test: - name: Run workflow tests + name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }})" # Only run on push if this is the nf-core dev branch (merged PRs) - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/radseq') }} + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/radseq') }}" runs-on: ubuntu-latest strategy: matrix: - # Nextflow versions - include: - # Test pipeline minimum Nextflow version - - NXF_VER: '21.10.3' - NXF_EDGE: '' - # Test latest edge release of Nextflow - - NXF_VER: '' - NXF_EDGE: '1' + NXF_VER: + - "24.04.2" + - "latest-everything" + profile: + - "conda" + - "docker" + - "singularity" + test_name: + - "test" + isMaster: + - ${{ github.base_ref == 'master' }} + # Exclude conda and singularity on dev + exclude: + - isMaster: false + profile: "conda" + - isMaster: false + profile: "singularity" steps: - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - env: - NXF_VER: ${{ matrix.NXF_VER }} - # Uncomment only if the edge release is more recent than the latest stable release - # See https://github.com/nextflow-io/nextflow/issues/2467 - # NXF_EDGE: ${{ matrix.NXF_EDGE }} + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + + - name: Set up Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Set up Apptainer + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: matrix.profile == 'singularity' run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Set up Miniconda + if: matrix.profile == 'conda' + uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 + with: + miniconda-version: "latest" + auto-update-conda: true + conda-solver: libmamba + channels: conda-forge,bioconda + + - name: Set up Conda + if: matrix.profile == 'conda' + run: | + echo $(realpath $CONDA)/condabin >> $GITHUB_PATH + echo $(realpath python) >> $GITHUB_PATH + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix + - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} --outdir ./results diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 0000000..0b6b1f2 --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 0000000..2576cc0 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,121 @@ +name: Test successful pipeline download with 'nf-core pipelines download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to main/master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core pipelines download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - main + - master + pull_request_target: + branches: + - main + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + with: + python-version: "3.12" + architecture: "x64" + + - name: Setup Apptainer + uses: eWaterCycle/setup-apptainer@4bb22c52d4f63406c49e94c804632975787312b3 # v2.0.0 + with: + apptainer-version: 1.3.4 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Make a cache directory for the container images + run: | + mkdir -p ./singularity_container_images + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + run: | + nf-core pipelines download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "community.wave.seqera.io/library/" \ + --container-cache-utilisation 'amend' \ + --download-configuration 'yes' + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Count the downloaded number of container images + id: count_initial + run: | + image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) + echo "Initial container image count: $image_count" + echo "IMAGE_COUNT_INITIAL=$image_count" >> ${GITHUB_ENV} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results + + - name: Count the downloaded number of container images + id: count_afterwards + run: | + image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) + echo "Post-pipeline run container image count: $image_count" + echo "IMAGE_COUNT_AFTER=$image_count" >> ${GITHUB_ENV} + + - name: Compare container image counts + run: | + if [ "${{ env.IMAGE_COUNT_INITIAL }}" -ne "${{ env.IMAGE_COUNT_AFTER }}" ]; then + initial_count=${{ env.IMAGE_COUNT_INITIAL }} + final_count=${{ env.IMAGE_COUNT_AFTER }} + difference=$((final_count - initial_count)) + echo "$difference additional container images were \n downloaded at runtime . The pipeline has no support for offline runs!" + tree ./singularity_container_images + exit 1 + else + echo "The pipeline can be downloaded successfully!" + fi diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml new file mode 100644 index 0000000..92e1dbd --- /dev/null +++ b/.github/workflows/fix-linting.yml @@ -0,0 +1,89 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + fix-linting: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'nf-core/radseq' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + # Install and run pre-commit + - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + with: + python-version: "3.12" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true + + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" + + - name: Commit & push changes + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix code linting" + git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/radseq/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 858d622..dbd52d5 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,6 +1,6 @@ name: nf-core linting # This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure +# It runs the `nf-core pipelines lint` and markdown lint tests to ensure # that the code meets the nf-core guidelines. on: push: @@ -11,87 +11,62 @@ on: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - - uses: actions/setup-node@v3 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.12 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` + python-version: "3.12" - Once you push these changes the test should pass, and you can hide this comment :+1: + - name: Install pre-commit + run: pip install pre-commit - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! - - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 with: - python-version: "3.7" + python-version: "3.12" architecture: "x64" + - name: read .nf-core.yml + uses: pietrobolcato/action-read-yaml@1.1.0 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + - name: Install dependencies run: | python -m pip install --upgrade pip - pip install nf-core + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Run nf-core pipelines lint + if: ${{ github.base_ref != 'master' }} + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - - name: Run nf-core lint + - name: Run nf-core pipelines lint --release + if: ${{ github.base_ref == 'master' }} env: GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + run: nf-core -l lint_log.txt pipelines lint --release --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - name: Save PR number if: ${{ always() }} @@ -99,7 +74,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bbcd30..0bed96d 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@80620a5d27ce0ae443b965134db88467fc607b43 # v7 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml new file mode 100644 index 0000000..450b1d5 --- /dev/null +++ b/.github/workflows/release-announcements.yml @@ -0,0 +1,75 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" | sed 's/-//g' >> $GITHUB_OUTPUT + + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template_version_comment.yml new file mode 100644 index 0000000..537529b --- /dev/null +++ b/.github/workflows/template_version_comment.yml @@ -0,0 +1,46 @@ +name: nf-core template version comment +# This workflow is triggered on PRs to check if the pipeline template version matches the latest nf-core version. +# It posts a comment to the PR, even if it comes from a fork. + +on: pull_request_target + +jobs: + template_version: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Read template version from .nf-core.yml + uses: nichmor/minimal-read-yaml@v0.0.2 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + + - name: Install nf-core + run: | + python -m pip install --upgrade pip + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Check nf-core outdated + id: nf_core_outdated + run: echo "OUTPUT=$(pip list --outdated | grep nf-core)" >> ${GITHUB_ENV} + + - name: Post nf-core template version comment + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + if: | + contains(env.OUTPUT, 'nf-core') + with: + repo-token: ${{ secrets.NF_CORE_BOT_AUTH_TOKEN }} + allow-repeats: false + message: | + > [!WARNING] + > Newer version of the nf-core template is available. + > + > Your pipeline is using an old version of the nf-core template: ${{ steps.read_yml.outputs['nf_core_version'] }}. + > Please update your pipeline to the latest version. + > + > For more documentation on how to update your pipeline, please see the [nf-core documentation](https://github.com/nf-core/tools?tab=readme-ov-file#sync-a-pipeline-with-the-template) and [Synchronisation documentation](https://nf-co.re/docs/contributing/sync). + # diff --git a/.gitignore b/.gitignore index 74b56c6..a42ce01 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ results/ testing/ testing* *.pyc -./work +null/ diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 0000000..83599f6 --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,10 @@ +image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update + +vscode: + extensions: + - nf-core.nf-core-extensionpack # https://github.com/nf-core/vscode-extensionpack diff --git a/.nf-core.yml b/.nf-core.yml index 655f7ad..f23f0ab 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,2 +1,11 @@ +nf_core_version: 3.1.0 repository_type: pipeline -org_path: ./modules +template: + author: Gabriel Barrett + description: variant calling pipeline for radseq + force: false + is_nfcore: true + name: radseq + org: nf-core + outdir: . + version: 1.0dev diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..9e9f0e1 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.1.0" + hooks: + - id: prettier + additional_dependencies: + - prettier@3.2.5 + + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "3.0.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000..437d763 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,12 @@ +email_template.html +adaptivecard.json +slackreport.json +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc +bin/ diff --git a/.prettierrc.yml b/.prettierrc.yml new file mode 100644 index 0000000..c81f9a7 --- /dev/null +++ b/.prettierrc.yml @@ -0,0 +1 @@ +printWidth: 120 diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..a33b527 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "markdown.styles": ["public/vscode_markdown.css"] +} diff --git a/CITATIONS.md b/CITATIONS.md index 5982683..3467bd1 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -8,70 +8,34 @@ > Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. -## [dDocent](https://github.com/jpuritz/dDocent) -> Puritz JB, Hollenbeck CM, Gold JR. dDocent: a RADseq, variant-calling pipeline designed for population genomics of non-model organisms. PeerJ. 2014 Jun 10;2:e431. doi: 10.7717/peerj.431. Epub 2014 Jun 10. PubMed PMID: 24949246; PubMed Central PMCID: PMC4060032. - ## Pipeline tools -- [BEDOPS](https://pubmed.ncbi.nlm.nih.gov/22576172/) - > Neph S, Kuehn MS, Reynolds AP, Haugen E, Thurman RE, Johnson AK, Rynes E, Maurano MT, Vierstra J, Thomas S, Sandstrom R. BEDOPS: high-performance genomic feature operations. Bioinformatics. 2012 Jul 15;28(14):1919-20. doi: 10.1093/bioinformatics/bts277. Epub 2012 May 9. PubMed PMID: 22576172. PubMed Central PMCID: PMC3389768. - -- [BEDTools](https://pubmed.ncbi.nlm.nih.gov/20110278/) - - > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. - -- [BCFTools](https://pubmed.ncbi.nlm.nih.gov/21903627/) - - > Li H: A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics. 2011 Nov 1;27(21):2987-93. doi: 10.1093/bioinformatics/btr509. PubMed PMID: 21903627; PubMed Central PMCID: PMC3198575. - -- [BWA-MEM](https://arxiv.org/abs/1303.3997v2) - - > Li H: Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. arXiv 2013. doi: 10.48550/arXiv.1303.3997 -* [CD-HIT](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3516142/) - > Fu L, Niu B, Zhu Z, Wu S, Li W. CD-HIT: accelerated for clustering the next-generation sequencing data. Bioinformatics. 2012 Dec 1;28(23):3150-2. doi: 10.1093/bioinformatics/bts565. Epub 2012 Oct 11. PubMed PMID: 23060610. PubMed Central PMCID: PMC3516142. +- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) -* [FastP](https://pubmed.ncbi.nlm.nih.gov/30423086/) - > Chen S, Zhou Y, Chen Y, Gu J. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018 Sep 1;34(17):i884-90. doi: 10.1093/bioinformatics/bty560. PubMed PMID: PMC6129281. PubMed Central PMCID: 30423086. +> Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. -* [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) -- [FreeBayes](https://arxiv.org/abs/1207.3907) +> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. - > Garrison E, Marth G. Haplotype-based variant detection from short-read sequencing. arXiv preprint arXiv:1207.3907 [q-bio.GN] 2012. doi: 10.48550/arXiv.1207.3907 - -* [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. - -- [Rainbow](https://academic.oup.com/bioinformatics/article/28/21/2732/233132) - > Li YL, Xue DX, Zhang BD, Liu JX. An optimized approach for local de novo assembly of overlapping paired-end RAD reads from multiple individuals. Royal Society Open Science. 2018 Feb 28;5(2):171589. doi: 10.1098/rsos.171589. Epub 2018 Feb 28. PubMed PMID: 29515871. PubMed Central PMCID: PMC5830760. - -- [SAMtools](https://pubmed.ncbi.nlm.nih.gov/19505943/) - - > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. - -- [SeqKit](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5051824/) - > Shen W, Le S, Li Y, Hu F. SeqKit: a cross-platform and ultrafast toolkit for FASTA/Q file manipulation. PloS one. 2016 Oct 5;11(10):e0163962. doi: 10.1371/journal.pone.0163962. PubMed PMID: 27706213. PubMed Central PMCID: PMC5051824. +## Software packaging/containerisation tools -- [Tabix](https://academic.oup.com/bioinformatics/article/27/5/718/262743) +- [Anaconda](https://anaconda.com) - > Li H, Tabix: fast retrieval of sequence features from generic TAB-delimited files, Bioinformatics, Volume 27, Issue 5, 1 March 2011, Pages 718–719, doi: 10.1093/bioinformatics/btq671. PubMed PMID: 21208982. PubMed Central PMCID: PMC3042176. + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. -- [UMI-tools](https://pubmed.ncbi.nlm.nih.gov/28100584/) +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) - > Smith T, Heger A, Sudbery I. UMI-tools: modeling sequencing errors in Unique Molecular Identifiers to improve quantification accuracy Genome Res. 2017 Mar;27(3):491-499. doi: 10.1101/gr.209601.116. Epub 2017 Jan 18. PubMed PMID: 28100584; PubMed Central PMCID: PMC5340976. + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. -## Software packaging/containerisation tools +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) -* [Anaconda](https://anaconda.com) - > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. -* [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) - > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) -* [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) - > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. -* [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) -* [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) - > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052..c089ec7 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/README.md b/README.md index 657f7f3..a320c0c 100644 --- a/README.md +++ b/README.md @@ -1,86 +1,89 @@ -# ![nf-core/radseq](docs/images/nf-core-radseq_logo_light.png#gh-light-mode-only) ![nf-core/radseq](docs/images/nf-core-radseq_logo_dark.png#gh-dark-mode-only) - -[![GitHub Actions CI Status](https://github.com/nf-core/radseq/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/radseq/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/nf-core/radseq/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/radseq/actions?query=workflow%3A%22nf-core+linting%22) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/radseq/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) - -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg?labelColor=000000)](https://www.nextflow.io/) +

+ + + nf-core/radseq + +

+ +[![GitHub Actions CI Status](https://github.com/nf-core/radseq/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/radseq/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/radseq/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/radseq/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/radseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) + +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/radseq) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23radseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/radseq) -[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core) -[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23radseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/radseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction -**nf-core/radseq** is a bioinformatics best-practice variant calling pipeline for RADseq data. +**nf-core/radseq** is a bioinformatics pipeline that ... -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! + - -On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/radseq/results). - -## Pipeline summary + + 1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Adapter and quality trimming ([`fastp`](https://github.com/OpenGene/fastp)) -3. Choice of constructing psuedoreference (i) or using existing reference (ii) - - i. [`cdhit`](https://sites.google.com/view/cd-hit?pli=1) -> [`rainbow`](https://github.com/ChongLab/rainbow) - - ii. **No Preprossesing** -4. Choice of alignment software - - i. [`BWA`](https://bio-bwa.sourceforge.net/bwa.shtml) - - ii. [`BWAMEM2`](https://github.com/bwa-mem2/bwa-mem2) +2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) -5. UMI-based deduplicated ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools)) +## Usage -5. Index, merge and index alignments ([`SAMtools`](https://github.com/samtools/samtools)) +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. -6. Construct intervals for freebayes ([`BEDtools`](https://bedtools.readthedocs.io/en/latest/)) + -2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_ +Now, you can run the pipeline using: -3. Download the pipeline and test it on a minimal dataset with a single command: + - ```console - nextflow run nf-core/radseq -profile test,YOURPROFILE - ``` +```bash +nextflow run nf-core/radseq \ + -profile \ + --input samplesheet.csv \ + --outdir +``` - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files). - > * The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > * Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > * If you are using `singularity` and are persistently observing issues downloading Singularity images directly due to timeout or network issues, then you can use the `--singularity_pull_docker_container` parameter to pull and convert the Docker image instead. Alternatively, you can use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > * If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/radseq/usage) and the [parameter documentation](https://nf-co.re/radseq/parameters). -4. Start running your own analysis! +## Pipeline output - ```console - nextflow run nf-core/radseq -profile --input samplesheet.csv - ``` +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/radseq/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/radseq/output). -## Documentation +## Credits -The nf-core/radseq pipeline comes with documentation about the pipeline [usage](https://nf-co.re/radseq/usage), [parameters](https://nf-co.re/radseq/parameters) and [output](https://nf-co.re/radseq/output). +nf-core/radseq was originally written by Gabriel Barrett. -## Credits +We thank the following people for their extensive assistance in the development of this pipeline: -dDocent was originally written by [Jon Puritz](https://github.com/jpuritz) and developed into nf-core/radseq by [Gabriel Barrett](https://github.com/Gabriel-A-Barrett) + ## Contributions and Support @@ -91,7 +94,9 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations - + + + An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 0000000..bfe6a96 --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/radseq v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/email_template.html b/assets/email_template.html index 6cb8044..03b7e46 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -4,7 +4,7 @@ - + nf-core/radseq Pipeline Report @@ -12,7 +12,7 @@ -

nf-core/radseq v${version}

+

nf-core/radseq ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index 09ca2a7..ebfd92d 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/radseq v${version} + nf-core/radseq ${version} ---------------------------------------------------- Run Name: $runName diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 0000000..8839c15 --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,29 @@ +id: "nf-core-radseq-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/radseq Methods Description" +section_href: "https://github.com/nf-core/radseq" +plot_type: "html" +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using nf-core/radseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

${tool_citations}

+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography} +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml deleted file mode 100644 index d2e4caf..0000000 --- a/assets/multiqc_config.yaml +++ /dev/null @@ -1,11 +0,0 @@ -report_comment: > - This report has been generated by the nf-core/radseq - analysis pipeline. For information about how to interpret these results, please see the - documentation. -report_section_order: - software_versions: - order: -1000 - nf-core-radseq-summary: - order: -1001 - -export_plots: true diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml new file mode 100644 index 0000000..0f24d91 --- /dev/null +++ b/assets/multiqc_config.yml @@ -0,0 +1,15 @@ +report_comment: > + This report has been generated by the nf-core/radseq + analysis pipeline. For information about how to interpret these results, please see the + documentation. +report_section_order: + "nf-core-radseq-methods-description": + order: -1000 + software_versions: + order: -1001 + "nf-core-radseq-summary": + order: -1002 + +export_plots: true + +disable_version_detection: true diff --git a/assets/nf-core-radseq_logo_light.png b/assets/nf-core-radseq_logo_light.png index 706a3a9..f07dd2b 100644 Binary files a/assets/nf-core-radseq_logo_light.png and b/assets/nf-core-radseq_logo_light.png differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5330c79..5f653ab 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,2 +1,3 @@ -sample,fastq_1,fastq_2,umi_barcodes,pop -SAMPLE_ID,/path/to/fastq/files/SAMPLE_ID.1.fq.gz,/path/to/fastq/files/SAMPLE_ID.2.fq.gz,FALSE,POP_ID +sample,fastq_1,fastq_2 +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, diff --git a/assets/schema_input.json b/assets/schema_input.json index 02ee32b..ff0345d 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/radseq/master/assets/schema_input.json", "title": "nf-core/radseq pipeline - params.input schema", "description": "Schema for the file provided with params.input", @@ -10,33 +10,24 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] }, "fastq_1": { "type": "string", + "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] - }, - "umi_barcodes": { - "errorMessage": "Boolean that describes if the read contains UMI barcodes must be binary 'TRUE' or 'FALSE'" + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" } }, - "required": [ - "sample", - "fastq_1" - ] + "required": ["sample", "fastq_1"] } } diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 0000000..478cdec --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "nf-core/radseq ${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index 53acf07..0000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -import errno -import argparse - - -def parse_args(args=None): - Description = "Reformat nf-core/radseq samplesheet file and check its contents." - Epilog = "Example usage: python check_samplesheet.py " - - parser = argparse.ArgumentParser(description=Description, epilog=Epilog) - parser.add_argument("FILE_IN", help="Input samplesheet file.") - parser.add_argument("FILE_OUT", help="Output file.") - return parser.parse_args(args) - - -def make_dir(path): - if len(path) > 0: - try: - os.makedirs(path) - except OSError as exception: - if exception.errno != errno.EEXIST: - raise exception - - -def print_error(error, context="Line", context_str=""): - error_str = "ERROR: Please check samplesheet -> {}".format(error) - if context != "" and context_str != "": - error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format( - error, context.strip(), context_str.strip() - ) - print(error_str) - sys.exit(1) - - -def check_samplesheet(file_in, file_out): - """ - This function checks that the samplesheet follows the following structure: - - sample,fastq_1,fastq_2,umi_barcodes - sample3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,false - sample4,AEG588A4_S4_L003_R1_001.fastq.gz,false - - For an example see: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - """ - - sample_mapping_dict = {} - with open(file_in, "r") as fin: - - ## Check header - MIN_COLS = 3 - # Update the column names for the input samplesheet - HEADER = ["sample", "fastq_1", "fastq_2","umi_barcodes"] - header = [x.strip('"') for x in fin.readline().strip().split(",")] - if header[: len(HEADER)] != HEADER: - print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER))) - sys.exit(1) - - ## Check sample entries - for line in fin: - lspl = [x.strip().strip('"') for x in line.strip().split(",")] - - # Check valid number of columns per row - if len(lspl) < len(HEADER): - print_error( - "Invalid number of columns (minimum = {})!".format(len(HEADER)), - "Line", - line, - ) - num_cols = len([x for x in lspl if x]) - if num_cols < MIN_COLS: - print_error( - "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), - "Line", - line, - ) - - ## Check sample name entries - sample, fastq_1, fastq_2, umi_barcodes = lspl[: len(HEADER)] - sample = sample.replace(" ", "_") - if not sample: - print_error("Sample entry has not been specified!", "Line", line) - - ## Check FastQ file extension - for fastq in [fastq_1, fastq_2]: - if fastq: - if fastq.find(" ") != -1: - print_error("FastQ file contains spaces!", "Line", line) - if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): - print_error( - "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", - "Line", - line, - ) - - ## Auto-detect paired-end/single-end - sample_info = [] ## [single_end, fastq_1, fastq_2] - if sample and fastq_1 and fastq_2: ## Paired-end short reads - sample_info = ["0", fastq_1, fastq_2, umi_barcodes] - elif sample and fastq_1 and not fastq_2: ## Single-end short reads - sample_info = ["1", fastq_1, fastq_2, umi_barcodes] - else: - print_error("Invalid combination of columns provided!", "Line", line) - - ## Create sample mapping dictionary = { sample: [ single_end, fastq_1, fastq_2 ] } - if sample not in sample_mapping_dict: - sample_mapping_dict[sample] = [sample_info] - else: - if sample_info in sample_mapping_dict[sample]: - print_error("Samplesheet contains duplicate rows!", "Line", line) - else: - sample_mapping_dict[sample].append(sample_info) - - ## Write validated samplesheet with appropriate columns - if len(sample_mapping_dict) > 0: - out_dir = os.path.dirname(file_out) - make_dir(out_dir) - with open(file_out, "w") as fout: - fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2", "umi_barcodes"]) + "\n") - for sample in sorted(sample_mapping_dict.keys()): - - ## Check that multiple runs of the same sample are of the same datatype - if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): - print_error("Multiple runs of a sample must be of the same datatype!", "Sample: {}".format(sample)) - - for idx, val in enumerate(sample_mapping_dict[sample]): - fout.write(",".join(["{}_T{}".format(sample, idx + 1)] + val) + "\n") - else: - print_error("No entries to process!", "Samplesheet: {}".format(file_in)) - - -def main(args=None): - args = parse_args(args) - check_samplesheet(args.FILE_IN, args.FILE_OUT) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/bin/write_samplesheet.sh b/bin/write_samplesheet.sh deleted file mode 100644 index 58fffbf..0000000 --- a/bin/write_samplesheet.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/bash - - -echo "sample,fastq_1,fastq_2,umi_barcodes" > input.csv -paste -d',' <(for i in $(pwd)/data/demult/*.1.fq.gz; do basename $i | cut -f1 -d'.' -; done)\ - <(ls $(pwd)/data/demult/*.1.fq.gz) <(ls $(pwd)/data/demult/*.2.fq.gz)\ - <(for i in $(pwd)/data/demult/*.1.fq.gz; do if [[ "$i" =~ "Golden7".* ]]; then echo 'true'; else echo 'false'; fi; done)\ - >> input.csv \ No newline at end of file diff --git a/conf/base.config b/conf/base.config index c5a67a3..2d63d54 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nf-core/radseq Nextflow base config file -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A 'blank slate' config file, appropriate for general use on most high performance compute environments. Assumes that all software is installed and available on the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. @@ -10,58 +10,47 @@ process { - cpus = { check_max( 3 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + // TODO nf-core: Check the defaults for all processes + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } - errorStrategy = { task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish' } - maxRetries = 3 + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 maxErrors = '-1' - cache = 'lenient' // Process-specific resource requirements - // NOTE - Please try and re-use the labels below as much as possible. + // NOTE - Please try and reuse the labels below as much as possible. // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. + // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - withLabel:process_tiny { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 1.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } } withLabel:process_low { - cpus = { check_max( 5 * task.attempt, 'cpus' ) } - memory = { check_max( 5.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 4.h * task.attempt } } withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 15.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 8.h * task.attempt } } withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 55.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } - } - withLabel:process_super { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 100.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + cpus = { 12 * task.attempt } + memory = { 72.GB * task.attempt } + time = { 16.h * task.attempt } } withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } + time = { 20.h * task.attempt } } withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } - cpus = 1 + memory = { 200.GB * task.attempt } } withLabel:error_ignore { errorStrategy = 'ignore' @@ -70,7 +59,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } } diff --git a/conf/igenomes.config b/conf/igenomes.config index 855948d..3f11437 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Nextflow config file for iGenomes paths -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines reference genomes using iGenome paths. Can be used by any config that customises the base path using: $params.igenomes_base / --igenomes_base @@ -13,7 +13,7 @@ params { genomes { 'GRCh37' { fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" @@ -26,7 +26,7 @@ params { } 'GRCh38' { fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" @@ -36,9 +36,17 @@ params { macs_gsize = "2.7e9" blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" @@ -51,7 +59,7 @@ params { } 'TAIR10' { fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" @@ -62,7 +70,7 @@ params { } 'EB2' { fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" @@ -72,7 +80,7 @@ params { } 'UMD3.1' { fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" @@ -83,7 +91,7 @@ params { } 'WBcel235' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" @@ -94,7 +102,7 @@ params { } 'CanFam3.1' { fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" @@ -105,7 +113,7 @@ params { } 'GRCz10' { fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" @@ -115,7 +123,7 @@ params { } 'BDGP6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" @@ -126,7 +134,7 @@ params { } 'EquCab2' { fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" @@ -137,7 +145,7 @@ params { } 'EB1' { fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" @@ -147,7 +155,7 @@ params { } 'Galgal4' { fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" @@ -157,7 +165,7 @@ params { } 'Gm01' { fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" @@ -167,7 +175,7 @@ params { } 'Mmul_1' { fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" @@ -178,7 +186,7 @@ params { } 'IRGSP-1.0' { fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" @@ -188,7 +196,7 @@ params { } 'CHIMP2.1.4' { fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" @@ -199,7 +207,7 @@ params { } 'Rnor_5.0' { fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" @@ -209,7 +217,7 @@ params { } 'Rnor_6.0' { fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" @@ -219,7 +227,7 @@ params { } 'R64-1-1' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" @@ -230,7 +238,7 @@ params { } 'EF2' { fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" @@ -242,7 +250,7 @@ params { } 'Sbi1' { fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" @@ -252,7 +260,7 @@ params { } 'Sscrofa10.2' { fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" @@ -263,7 +271,7 @@ params { } 'AGPv3' { fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" @@ -273,7 +281,7 @@ params { } 'hg38' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" @@ -285,7 +293,7 @@ params { } 'hg19' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" @@ -298,7 +306,7 @@ params { } 'mm10' { fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" @@ -311,7 +319,7 @@ params { } 'bosTau8' { fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" @@ -321,7 +329,7 @@ params { } 'ce10' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" @@ -333,7 +341,7 @@ params { } 'canFam3' { fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" @@ -344,7 +352,7 @@ params { } 'danRer10' { fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" @@ -355,7 +363,7 @@ params { } 'dm6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" @@ -366,7 +374,7 @@ params { } 'equCab2' { fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" @@ -377,7 +385,7 @@ params { } 'galGal4' { fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" @@ -388,7 +396,7 @@ params { } 'panTro4' { fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" @@ -399,7 +407,7 @@ params { } 'rn6' { fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" @@ -409,7 +417,7 @@ params { } 'sacCer3' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" @@ -419,7 +427,7 @@ params { } 'susScr3' { fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" diff --git a/conf/igenomes_ignored.config b/conf/igenomes_ignored.config new file mode 100644 index 0000000..b4034d8 --- /dev/null +++ b/conf/igenomes_ignored.config @@ -0,0 +1,9 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for iGenomes paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Empty genomes dictionary to use when igenomes is ignored. +---------------------------------------------------------------------------------------- +*/ + +params.genomes = [:] diff --git a/conf/modules.config b/conf/modules.config index e6a431a..d203d2b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1,12 +1,12 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Config file for defining DSL2 per module options and publishing paths -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. ---------------------------------------------------------------------------------------- */ @@ -14,517 +14,21 @@ process { publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: 'copy', + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SAMPLESHEET_CHECK { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: FASTQC { ext.args = '--quiet' } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: 'copy', - pattern: '*_versions.yml' - ] - } - - withName: FASTP { - ext.args = { [ - params.dont_eval_duplicates ? '--dont_eval_duplication' : '', - params.cut_right ? '--cut_right' : '', - params.window_size ? "--cut_window_size ${params.window_size}" : '', - params.mean_min_quality ? "--cut_mean_quality ${params.mean_min_quality}" : '', - params.pairedend_bp_corr ? '--correction' : '', - params.overlap_dif_limit ? "--overlap_diff_limit ${params.overlap_dif_limit}" : '', - params.clip_r1 ? "--trim_front1 ${params.clip_r1}" : '', - params.clip_r2 ? "--trim_front2 ${params.clip_r2}" : '', - params.trim_polyg ? '--trim_poly_g' : '', - ].join(' ').trim() } - // umi specific arguments to fastp. see https://github.com/OpenGene/fastp - ext.umi_args = params.umi_read_structure ?: '' - publishDir = [ - [ - path: { "${params.outdir}/fastp" }, - mode: params.publish_dir_mode, - pattern: "*.html", - ], - [ - path: { "${params.outdir}/fastp" }, - mode: params.publish_dir_mode, - pattern: "*.log", - ], - [ - path: { "${params.outdir}/fastp" }, - mode: params.publish_dir_mode, - pattern: "*.fq.gz", - enabled: params.save_trimmed - ] - ] - } - - withName: PREPARE_FORWARD_READS { - publishDir = [ - path: { "${params.outdir}/${params.method}/reference/" }, - mode: params.publish_dir_mode, - pattern: "*.uniq.seqs", - enabled: params.denovo_intermediate_files - ] - } - - withName: COMBINE_UNIQUE_READS { + withName: 'MULTIQC' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ - path: { "${params.outdir}/${params.method}/reference/" }, + path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, - pattern: "*_uniq.full.fasta", - enabled: params.denovo_intermediate_files - ] - } - - withName: SEQTK_SEQ { - ext.args = '-F I' - publishDir = [ - path: { "${params.outdir}/${params.method}/reference/seqtk/" }, - mode: params.publish_dir_mode, - pattern: "*.seqtk-seq.*.gz", - enabled: params.denovo_intermediate_files - ] - } - - withName: DENOVO_FASTP { - ext.args = '' - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/reference/fastp/" }, - mode: params.publish_dir_mode, - pattern: "*.uniq.fasta", - enabled: params.denovo_intermediate_files - ], - [ - path: { "${params.outdir}/${params.method}/reference/fastp/" }, - mode: params.publish_dir_mode, - pattern: "*.totaluniqseq", - enabled: params.denovo_intermediate_files - ] - ] - } - - withName: CDHIT { - ext.args = { [ - params.cluster_algorithm ? "-g ${params.cluster_algorithm}" : '', - params.description_length ? "-d ${params.description_length}" : '', - params.sequence_simularity ? '-c ' + params.sequence_simularity : '' - ].join(' ').trim() } - - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/reference/cdhit" }, - mode: params.publish_dir_mode, - pattern: "*_cdhit.log", - enabled: params.denovo_intermediate_files - - ], - [ - path: { "${params.outdir}/${params.method}/reference/cdhit" }, - mode: params.publish_dir_mode, - pattern: "*.clstr", - enabled: params.denovo_intermediate_files - ] - ] - } - - withName: CDHIT_TO_RBDIV { - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/reference/cdhit_to_rbdiv" }, - mode: params.publish_dir_mode, - pattern: "*.sort.contig.cluster.ids", - enabled: params.denovo_intermediate_files - ], - [ - path: { "${params.outdir}/${params.method}/reference/cdhit_to_rbdiv" }, - mode: params.publish_dir_mode, - pattern: "*.contig.cluster.totaluniqseq", - enabled: params.denovo_intermediate_files - ], - [ - path: { "${params.outdir}/${params.method}/reference/cdhit_to_rbdiv" }, - mode: params.publish_dir_mode, - pattern: "*.rcluster", - enabled: params.denovo_intermediate_files - ] - ] - } - - withName: RAINBOW_DIV { - // recommended static arguments - ext.args = { [ - params.similarity_fraction ? "-f ${params.similarity_fraction}" : '', - params.max_variants ? "-K ${params.max_variants}" : '' - ].join(' ').trim() } - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/reference/rainbow_div" }, - mode: params.publish_dir_mode, - pattern: "*_rbdiv.out", - enabled: params.denovo_intermediate_files - ], - [ - path: { "${params.outdir}/${params.method}/reference/rainbow_div" }, - mode: params.publish_dir_mode, - pattern: "*.log", - enabled: params.denovo_intermediate_files - ] - ] - } - - withName: RAINBOW_MERGE { - // recommended static arguments - ext.args = { [ - params.min_reads ? "-r ${params.min_reads}" : '', - params.max_clusters_for_merge ? "-N${params.max_clusters_for_merge}" : '', - params.max_reads_for_assembly ? "-R${params.max_reads_for_assembly}" : '', - params.min_overlap ? "-l ${params.min_overlap}" : '', - params.min_similarity_fraction ? "-f ${params.min_similarity_fraction}" : '' - ].join(' ').trim() } - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/reference/rainbow_merge" }, - mode: params.publish_dir_mode, - pattern: "*_rbmerge.log", - enabled: params.denovo_intermediate_files - ], - [ - path: { "${params.outdir}/${params.method}/reference/rainbow_merge" }, - mode: params.publish_dir_mode, - pattern: "*_rbmerge.out", - enabled: params.denovo_intermediate_files - ] - ] - } - - withName: WRITE_FASTA { - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/reference/write_fasta" }, - mode: params.publish_dir_mode, - pattern: "*_rainbow.fasta" - ] - ] - } - - withName: SAMTOOLS_FAIDX { - ext.args = '' - publishDir = [ - path: { "${params.outdir}/${params.method}/reference/index" }, - mode: params.publish_dir_mode, - enabled: params.save_reference_indices, - pattern: "*.fai" - ] - } - - - withName: BWA_INDEX { - ext.args = '' - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/reference/index/" }, - mode: params.publish_dir_mode, - enabled: params.save_reference_indices, - pattern: "bwa/*" - ] - ] - } - - withName: BWA_MEM { - ext.args = { [ - // set default scores for alignments - params.clipping_penalty ? '-L ' + params.clipping_penalty : '', - params.output_secondary ? '-a' : '', - params.mark_short_as_sec ? '-M' : '', - params.min_aln_quality ? '-T ' + params.min_aln_quality : '', - params.matching_score ? '-A ' + params.matching_score : '', - params.mismatch_score ? '-B ' + params.mismatch_score : '', - params.gap_penalty ? '-O ' + params.gap_penalty : '' - ].join(' ').trim() } - - ext.args2 = { [ - '-S -h -u', - params.quality_score ? '-q ' + params.quality_score : '-q 1' - ].join(' ').trim() } - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/alignments/samtools_stats/" }, - mode: params.publish_dir_mode, - pattern: "*.bam" - ] - ] - } - - withName: BWAMEM2_MEM { - ext.args = { [ - // set default scores for alignments - params.clipping_penalty ? '-L ' + params.clipping_penalty : '', - params.output_secondary ? '-a' : '', - params.mark_short_as_sec ? '-M' : '', - params.min_aln_quality ? '-T ' + params.min_aln_quality : '', - params.matching_score ? '-A ' + params.matching_score : '', - params.mismatch_score ? '-B ' + params.mismatch_score : '', - params.gap_penalty ? '-O ' + params.gap_penalty : '' - ].join(' ').trim() } - - ext.args2 = { [ - '-S -h -u', - params.quality_score ? '-q ' + params.quality_score : '-q 1' - ].join(' ').trim() } - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/alignments/samtools_stats/" }, - mode: params.publish_dir_mode, - pattern: "*.bam" - ] - ] - } - - withName: UMITOOLS_DEDUP { - ext.args = '' - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/alignments/umitools_dedup/" }, - mode: params.publish_dir_mode, - pattern: "*.bam" - ], - [ - path: { "${params.outdir}/${params.method}/alignments/umitools_dedup/stats" }, - mode: params.publish_dir_mode, - pattern: "*.tsv" - ] - ] - } - - withName: SAMTOOLS_INDEX { - ext.args = '' - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/alignments/samtools_index" }, - mode: params.publish_dir_mode, - pattern: "*.bam" - ] - ] - } - - withName: SAMTOOLS_MERGE { - ext.args = '' - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/alignments/samtools_merge/" }, - mode: params.publish_dir_mode, - pattern: "*.bam" - ] - ] - } - - withName: SAMTOOLS_FLAGSTAT { - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/alignments/samtools_stats/" }, - mode: params.publish_dir_mode, - pattern: "*.flagstat" - ] - ] - } - - withName: SAMTOOLS_STATS { - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/alignments/samtools_stats/" }, - mode: params.publish_dir_mode, - pattern: "*.stats" - ] - ] - } - - withName: SAMTOOLS_IDXSTATS { - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/alignments/samtools_stats/" }, - mode: params.publish_dir_mode, - pattern: "*.idxstats" - ] - ] - } - - withName: BEDTOOLS_BAMTOBED { - ext.args = '' - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/alignments/intervals/bedtools_bamtobed/" }, - mode: params.publish_dir_mode, - pattern: "*.bed", - enabled: params.save_intervals - ] - ] - } - - withName: BEDOPS_MERGE_BED { - ext.args = '' - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/alignments/intervals/bedops_merge/" }, - mode: params.publish_dir_mode, - pattern: "*.bed", - enabled: params.save_intervals - ] - ] - } - - withName: BEDTOOLS_SORT { - ext.args = '' - ext.prefix = '' - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/alignments/intervals/bedtools_sort/" }, - mode: params.publish_dir_mode, - pattern: "*.bed", - enabled: params.save_intervals - ] - ] - } - - withName: BEDTOOLS_COVERAGE { - ext.args = '' - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/alignments/intervals/bedtools_coverage/" }, - mode: params.publish_dir_mode, - pattern: "*.cov", - enabled: params.save_intervals - ] - ] - } - - withName: BEDTOOLS_MERGE_COV { - ext.args = '' - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/alignments/intervals/bedtools_merge/" }, - mode: params.publish_dir_mode, - pattern: "*.cov", - enabled: params.save_intervals - ] - ] - } - - withName: BEDTOOLS_MAKEWINDOWS { - ext.args = '' - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/alignments/intervals/bedtools_makewindows/" }, - mode: params.publish_dir_mode, - pattern: "*.tab", - enabled: params.save_intervals - ], - [ - path: { "${params.outdir}/${params.method}/alignments/intervals/bedtools_makewindows/" }, - mode: params.publish_dir_mode, - pattern: "_cov.low.stats", - enabled: params.save_intervals - ], - [ - path: { "${params.outdir}/${params.method}/alignments/intervals/bedtools_makewindows/" }, - mode: params.publish_dir_mode, - pattern: "_cov.low.stats", - enabled: params.save_intervals - ] - ] - } - - withName: BEDTOOLS_INTERSECT { - ext.args = '' - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/alignments/intervals/bedtools_intersect/" }, - mode: params.publish_dir_mode, - pattern: "*.bed", - enabled: params.save_intervals - ] - ] - - } - - withName: CREATE_INTERVALS { - ext.args = '' - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/alignments/intervals/create_intervals/" }, - mode: params.publish_dir_mode, - pattern: "mapped.*.bed", - enabled: params.save_intervals - ] - ] - } - - withName: FREEBAYES { - ext.args = { [ - params.min_map_qual ? "-m ${params.min_map_qual}" : '', - params.min_base_qual ? "-q ${params.min_base_qual}" : '', - params.complex_gap ? "-E ${params.complex_gap}" : '', - params.use_best_n_alleles ? "-n ${params.use_best_n_alleles}" : '', - params.min_alt_fraction ? "-F ${params.min_alt_fraction}" : '', - params.min_repeat_entropy ? "--min-repeat-entropy ${params.min_repeat_entropy}" : '' - ].join(' ').trim() } - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/variant_calling/intervals" }, - mode: params.publish_dir_mode, - pattern: "*.vcf.gz", - enabled: params.save_freebayes_intervals - ] - ] - } - - withName: BCFTOOLS_SORT { - ext.args = '' - ext.prefix = "Golden_sort" - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/variant_calling/intervals/" }, - mode: params.publish_dir_mode, - pattern: "*.gz", - enabled: params.save_freebayes_intervals - ] - ] - } - - withName: TABIX_TABIX { - ext.args = '' - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/variant_calling/intervals/" }, - mode: params.publish_dir_mode, - pattern: "*.tbi", - enabled: params.save_freebayes_intervals - ] + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: BCFTOOLS_CONCAT { - ext.args = '--remove-duplicates --allow-overlaps' - publishDir = [ - [ - path: { "${params.outdir}/${params.method}/variant_calling/" }, - mode: params.publish_dir_mode, - pattern: "*.gz", - ] - ] - } } diff --git a/conf/test.config b/conf/test.config index 12c8d29..2192d51 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,36 +1,32 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Nextflow config file for running minimal tests -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/radseq -profile test, + nextflow run nf-core/radseq -profile test, --outdir ---------------------------------------------------------------------------------------- */ +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + params { config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '2.GB' - max_time = '6.h' - - input = '/mnt/d/nextflow_testing/radseq/input.csv' - - // Method - method = 'denovo' - - aligner = 'minimap2' - - popmap = 'https://raw.githubusercontent.com/Gabriel-A-Barrett/test-datasets/radseq/testdata/ddRAD/popmap.txt' + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' // Genome references - genome = 'https://raw.githubusercontent.com/Gabriel-A-Barrett/test-datasets/radseq/testdata/ddRAD/CM014990.1.fna' - - umi_read_structure = '--umi --umi_loc=read2 --umi_len=8 --umi_skip=2 --umi_prefix=UMI' - + genome = 'R64-1-1' } diff --git a/conf/test_full.config b/conf/test_full.config index f34fb21..c83b8a1 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -1,11 +1,11 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Nextflow config file for running full-size tests -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/radseq -profile test_full, + nextflow run nf-core/radseq -profile test_full, --outdir ---------------------------------------------------------------------------------------- */ @@ -17,7 +17,7 @@ params { // Input data for full size test // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' // Genome references genome = 'R64-1-1' diff --git a/conf/xanadu.config b/conf/xanadu.config deleted file mode 100644 index 62d3744..0000000 --- a/conf/xanadu.config +++ /dev/null @@ -1,212 +0,0 @@ -params { - - // TODO nf-core: Specify your pipeline's command line flags - // Input options - input = "${baseDir}/data/input.csv" - - // 'denovo' or 'reference' - method = 'reference' - - // Denovo options - sequence_type = 'PE' - minReadDepth_WithinIndividual = null // defaults to 2 - minReadDepth_BetweenIndividual = null // defaults to 2 - - // Reference Genome options - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' - igenomes_ignore = false - - // Alignment options - aligner = 'bwa' - matching_score = null // defaults to 1 - mismatch_score = null // defaults to 4 - gap_penalty = null // defaults to 6 - quality_score = null // defaults to 1 - - // Intervals options - subset_intervals_channel = 300 - - - // Variant Calling options - splitByReadCoverage = '500000' - - // Intermediate files to put in the results dir. - save_trimmed = false - save_uniqseq = false - save_uniq_full_fasta = false - save_seqtk_seq_fasta = false - save_cdhit_clstr = false - save_trim_adapters_fastp = false - save_reference_fai = false - save_intervals = true - save_freebayes_intervals = false - - // MultiQC options - multiqc_config = null - multiqc_title = null - max_multiqc_email_size = '25.MB' - - // Boilerplate options - outdir = './results' - tracedir = "${params.outdir}/pipeline_info" - email = null - email_on_fail = null - publish_dir_mode = 'copy' - plaintext_email = false - monochrome_logs = false - help = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - enable_conda = false - - // Config options - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null - config_profile_contact = null - config_profile_url = null - config_profile_name = null - - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '500.GB' - max_cpus = 50 - max_time = '240.h' - -} - -// Load base.config by default for all pipelines: contains -includeConfig 'base.config' - -// Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} - -profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } - conda { - params.enable_conda = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - - process.executor = 'slurm' - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - singularity.cacheDir = "${baseDir}/work" - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } -} - -// Load igenomes.config if required -if (!params.igenomes_ignore) { - includeConfig 'igenomes.config' -} else { - params.genomes = [:] -} - -// Export these variables to prevent local Python/R libraries from conflicting with those in the container -// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. -// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. - -env { - PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" - JULIA_DEPOT_PATH = "/usr/local/share/julia" - TMPDIR='/home/FCAM/gbarrett/TMP_DIR' - SINGULARITY_CACHEDIR='/home/FCAM/gbarrett/TMP_DIR' - //NXF_OPTS="-Xms1g -Xmx30g" can't set NXF variables within config must use export before script execution - _JAVA_OPTIONS="-Xms1g -Xmx30g" - -} - -// setup SLURM for xanadu -process { - executor = 'slurm' - withLabel:process_high_memory {clusterOptions = '--qos=himem --partition=himem'} - withLabel:'!process_high_memory' {clusterOptions = '--qos=general --partition=general'} -} -executor { - queueSize = 100 - submitRateLimit = '5 sec' -} - -// Capture exit codes from upstream processes when piping -process.shell = ['/bin/bash', '-euo', 'pipefail'] - -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') -timeline { - enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" -} -report { - enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" -} -trace { - enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" -} -dag { - enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" -} - -manifest { - name = 'nf-core/radseq' - author = 'Gabriel Barrett' - homePage = 'https://github.com/nf-core/radseq' - description = 'dDocent workflow' - mainScript = 'main.nf' - nextflowVersion = '!>=21.10.3' - version = '1.0dev' -} - -// Load modules.config for DSL2 module specific options -includeConfig 'modules.config' - -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} diff --git a/docs/images/mqc_fastqc_adapter.png b/docs/images/mqc_fastqc_adapter.png deleted file mode 100755 index 361d0e4..0000000 Binary files a/docs/images/mqc_fastqc_adapter.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_counts.png b/docs/images/mqc_fastqc_counts.png deleted file mode 100755 index cb39ebb..0000000 Binary files a/docs/images/mqc_fastqc_counts.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png deleted file mode 100755 index a4b89bf..0000000 Binary files a/docs/images/mqc_fastqc_quality.png and /dev/null differ diff --git a/docs/images/nf-core-radseq_logo_dark.png b/docs/images/nf-core-radseq_logo_dark.png index 2781cdb..40f7bc3 100644 Binary files a/docs/images/nf-core-radseq_logo_dark.png and b/docs/images/nf-core-radseq_logo_dark.png differ diff --git a/docs/images/nf-core-radseq_logo_light.png b/docs/images/nf-core-radseq_logo_light.png index 841b4bf..5200674 100644 Binary files a/docs/images/nf-core-radseq_logo_light.png and b/docs/images/nf-core-radseq_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index 5032a92..6bb7229 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,247 +6,38 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. + + ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [Preprocessing](#preprocessing) - - [FastP](#fastp) - trim low-quality reads, umi-barcodes, adapters -- [Denovo Reference Construction](#denovo-reference-construction) - - [Prepare Forward Reads](#prepare-forward-reads) - combine forward and reverse sequences separated by 'NNNNNNNNNN' - - [Combine Uniqe Reads](#combine-uniq-reads) - retain reads present n number of times between and across individuals - - [Seqtk](#seqtk-seq) - write dummy fasta file - - [Denovo Fastp](#denovo-fastp) - trim adapters - - [CDHIT-est](#cdhit-est) - cluster similar sequences - - [cdhit_to_rbdiv](#cdhit-to-rbdiv) - convert cdhit file output into rainbow div input file format - - [Rainbow div](#rainbow-div) - distiguish sequence errors from heterozygote or variants between repetitive sequences - - [Rainbow merge](#rainbow-merge) - merge potential heterozygous clusters - - [write_fasta](#write_fasta) - convert rainbow merge file output into fasta format -- [Alignment](#alignment) - - [SAMtools](#samtools) - Sort, index and obtain alignment statistics - - [BWA](#bwa) - short read aligner - - [BWA-mem2](#bwa-mem2) - a faster short read aligner - - [UMI-tools dedup](#umi-tools-dedup) - UMI-based deduplication -- [Freebayes Intervals](#freebayes-intervals) - - [bedtools bamtobed](#bedtools-bamtobed): converts bam file into bed datastructure - - [bedops merge](#bedops-merge): merge indvidual bed files into a single bed file - - [bedtools sort](#bedtools-sort): sorts bed files - - [bedtools coverage](#bedtools-coverage): counts read depth - - [bedtools merge](#bedtools-merge): merges indv bed files and takes sum the read depths - - [bedtools makewindows](#bedtools-makewindows): split regions with coverage above `--max_read_coverage_to_split` to half read length - - [bedtools intersect](#bedtools-intersect): removes any overlapping regions between new bed file from `bedtools makewindows` and from `bedtools merge` - - [create intervals](#create-intervals): write regions for input to `freebayes` -- [Variant Calling](#variant-calling) - - [Freebayes](#freebayes) - a bayesian genotyper tool -- [Quality Control and Preprocessing](#qc-and-reporting) - - [FastQC](#fastqc) - Raw read QC - - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution - -# Directory Structure - -The default directory structure is as follows - -``` -{outdir} -├── denovo -│ ├── alignments -│ │ ├── samtools_index -│ │ ├── samtools_merge -│ │ ├── samtools_stats -│ │ └── umitools_dedup -│ │ └── stats -│ ├── reference -│ │ ├── cdhit -│ │ ├── cdhit_to_rbdiv -│ │ ├── rainbow_div -│ │ ├── rainbow_merge -│ │ └── write_fasta -│ └── variant_calling -├── fastp -├── fastqc -├── multiqc -│ └── multiqc_data -├── pipeline_info -└── reference - ├── alignments - │ ├── samtools_index - │ ├── samtools_merge - │ ├── samtools_stats - │ └── umitools_dedup - │ └── stats - └── variant_calling -``` - -# Preprocessing - -Radseq pre-processes reads prior to the alignment step. - -### FastP - -[FastP](https://github.com/OpenGene/fastp) is a tool designed to be an all-in-one preprocessor for FastQ files. You can enable the saving of trimmed fq files in output directory through `--save_trimmed=true`. - -
-Output files - -* `{outdir}/fastp/` - * `*_fastp.html`: Fastp report containing quality metrics. - * `*_fastp.log`: Log output containing statistics - * `*.fq.gz`: trimmed fq files - -
- -# Denovo Reference Construction - -Radseq supports the construction of psuedoreference using a conglomerate of open source tools. By default the resulting fasta file is only output to enable the output of intermediate files into `{outdir}/denovo/reference/` use `--denovo_intermediate_files=true`. - -### Prepare forward reads - -
-Output files - -* `{outdir}/denovo/reference` - * `*.uniq.seqs`: all unique sequences - -
- -### Seqtk seq - -
-Output files - -* `{outdir}/denovo/reference/seqtk` - * `*.seqtk-seq`: dummy fasta file - -
- -### Denovo FastP - -
-Output files - -* `{outdir}/denovo/reference/fastp` - * `*.uniq.fasta`: fasta format - * `*.totaluniqseq`: all unique sequences remaining after data cutoffs and adapter trimming - -
- -### Cdhit est - -
-Output files - -* `{outdir}/denovo/reference/cdhit` - * `*_cdhit.logs`: log output from cdhit-est - * `*.clstr`: clstr output used to convert into rainbow div format - -
- -### Cdhit to rbdiv - -
-Output files - -* `{outdir}/denovo/reference/cdhit_to_rbdiv` - * `*.sort.contig.cluster.ids`: file used for conversion of `cd-hit` to `Rainbow` input - * `*.contig.cluster.totaluniqseq`: used during assembly - * `*.rcluster`: input for `Rainbow` - -
- -### Rainbow div - -
-Output files - -* `{outdir}/denovo/reference/rainbow_div` - * `*_rbdiv.out`: rainbow div output file - * `*.log`: log file - -
- - -### Rainbow merge - -
-Output files - -* `{outdir}/denovo/reference/rainbow_div` - * `*_rainbow.fasta`: final fasta file used in subsequent processes - -
- -### Write fasta - -
-Output files - -* `{outdir}/denovo/reference/rainbow_div` - * `*_rbmerge.out`: rainbow merge output file - * `*_rbmerge.log`: log file - - -
- -# Freebayes Intervals - -# Alignment - -## Indices -enable the saving of reference indices with `--save_reference_indices true` generate from `samtools` and `bwa` for variant calling and short-read alignment respectiviely. - -### samtools faidx - -
-Output files - -* `{outdir}/denovo/reference/index` - * `*.fai`: samtools fai index - -
- -### bwa index - -
-Output files - -* `{outdir}/denovo/reference/index` - * `*_rbmerge.out`: rainbow merge output file - -
- -# Variant Calling +- [FastQC](#fastqc) - Raw read QC +- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline +- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution ### FastQC
Output files -* `fastqc/` - * `*_fastqc.html`: FastQC report containing quality metrics. - * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +- `fastqc/` + - `*_fastqc.html`: FastQC report containing quality metrics. + - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images.
[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). -![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) - -![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) - -![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) - -> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. - ### MultiQC
Output files -* `multiqc/` - * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - * `multiqc_plots/`: directory containing static images from the report in various formats. +- `multiqc/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats.
@@ -259,10 +50,11 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
Output files -* `pipeline_info/` - * Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - * Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - * Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`.
diff --git a/docs/usage.md b/docs/usage.md index c3a767b..0a3fecf 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,115 +6,160 @@ ## Introduction -radseq is a workflow designed to detect variants from restriction site-associated DNA sequences (RAD-seq). If a reference genome is available this workflow can be used on almost any kind of NGS data set. + -radseq is designed to call variants from species with or without a reference genome and can deduplicate reads based on unique moleculor identifier (umi) barcodes. +## Samplesheet input + +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. + +```bash +--input '[path to samplesheet file]' +``` + +### Multiple runs of the same sample + +The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: + +```csv title="samplesheet.csv" +sample,fastq_1,fastq_2 +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz +CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz +CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz +``` + +### Full samplesheet + +The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. + +A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. + +```csv title="samplesheet.csv" +sample,fastq_1,fastq_2 +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz +CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz +CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz +TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, +TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, +TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, +TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +``` + +| Column | Description | +| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | + +An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. ## Running the pipeline The typical command for running the pipeline is as follows: -```console -nextflow run nf-core/radseq --input samplesheet.csv --genome GRCh37 -profile docker +```bash +nextflow run nf-core/radseq --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. Note that the pipeline will create the following files in your working directory: -```console -work # Directory containing the nextflow working files -results # Finished results (configurable, see below) -.nextflow_log # Log file from Nextflow +```bash +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` -## Samplesheet input +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use the parameter `--input` to specify its location. It has to be a comma-separated file with 4 columns, and a header row as shown in the examples below. +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. -radseq does not handle duplicate samples in the input samplesheet. All samples must have a unique identifier. +> [!WARNING] +> Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -### Full samplesheet +The above pipeline run specified with a params file in yaml format: -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. **Important** the pipeline will group together individuals based on shared characters up to the first number. Therefore it is important to start sample ID's with a shared character and start the unique identifier with a number. +```bash +nextflow run nf-core/radseq -profile docker -params-file params.yaml +``` -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. Files grouped together will have the prefix sample, like for example, the final vcf will be named sample.vcf.gz. +with: -```console -sample,fastq_1,fastq_2,umi_barcodes,pop -sample1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,false,pop1 -sample2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,false,pop1 -sample3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,false,pop1 -sample4,AEG588A4_S4_L003_R1_001.fastq.gz,false,pop2 -sample5,AEG588A5_S5_L003_R1_001.fastq.gz,false,pop2 -sample6,AEG588A6_S6_L003_R1_001.fastq.gz,false,pop2 +```yaml title="params.yaml" +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +<...> ``` -| Column | Description | -|----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `umi_barcodes` | Boolean variable (true/false) describing describing the presence a of unique moleculor identifier (umi) in the sample. -| `pop` | Designated population the sample belongs to. - -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: -```console +```bash nextflow pull nf-core/radseq ``` ### Reproducibility -It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. +It is a good idea to specify the pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. + +First, go to the [nf-core/radseq releases page](https://github.com/nf-core/radseq/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. -First, go to the [nf-core/radseq releases page](https://github.com/nf-core/radseq/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. +To further assist in reproducibility, you can use share and reuse [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +> [!TIP] +> If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. ## Core Nextflow arguments -> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +> [!NOTE] +> These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen) ### `-profile` Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/). +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. +> [!IMPORTANT] > We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. -The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to check if your system is suported, please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. - -* `docker` - * A generic configuration profile to be used with [Docker](https://docker.com/) -* `singularity` - * A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) -* `podman` - * A generic configuration profile to be used with [Podman](https://podman.io/) -* `shifter` - * A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) -* `charliecloud` - * A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) -* `conda` - * A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. -* `test` - * A profile with a complete configuration for automated testing - * Includes links to test data so needs no other parameters +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer environment. + +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. ### `-resume` -Specify this when restarting a pipeline. Nextflow will used cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. @@ -126,90 +171,21 @@ Specify the path to a specific config file (this is a core Nextflow command). Se ### Resource requests -Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. - -For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: - -```console -[62/149eb0] NOTE: Process `RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) -Error executing process > 'RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' - -Caused by: - Process `RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) - -Command executed: - STAR \ - --genomeDir star \ - --readFilesIn WT_REP1_trimmed.fq.gz \ - --runThreadN 2 \ - --outFileNamePrefix WT_REP1. \ - - -Command exit status: - 137 - -Command output: - (empty) +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the pipeline steps, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher resources request (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -Command error: - .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. -Work dir: - /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. -Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` -``` - -To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so based on the search results the file we want is `modules/nf-core/software/star/align/main.nf`. If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. Providing you haven't set any other standard nf-core parameters to __cap__ the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. - -```nextflow -process { - withName: STAR_ALIGN { - memory = 100.GB - } -} -``` - -> **NB:** We specify just the process name i.e. `STAR_ALIGN` in the config file and not the full task name string that is printed to screen in the error message or on the terminal whilst the pipeline is running i.e. `RNASEQ:ALIGN_STAR:STAR_ALIGN`. You may get a warning suggesting that the process selector isn't recognised but you can ignore that if the process name has been specified correctly. This is something that needs to be fixed upstream in core Nextflow. - -### Updating containers - -The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. - -1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) -2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) -3. Create the custom config accordingly: +### Custom Containers - * For Docker: +In some cases, you may wish to change the container or conda environment used by a pipeline steps for a particular tool. By default, nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However, in some cases the pipeline specified version maybe out of date. - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. - * For Singularity: +### Custom Tool Arguments - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. - * For Conda: - - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` - -> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. ### nf-core/configs @@ -233,45 +209,6 @@ Some HPC setups also allow you to run nextflow within a cluster job submitted yo In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): -```console +```bash NXF_OPTS='-Xms1g -Xmx4g' ``` - -## How to handle UMIs - -radseq simultaneously processes UMI-reads and filters low quality reads using [fastp](https://github.com/OpenGene/fastp). - -In order to remove UMI tags you must provide additional information to `--umi_read_structure [structure]` in your parameters. - -This will enable deduplication of bam files prior to variant calling and the calculation of alignment statistics. - -## How to run in reference or denovo modes? - -To run the workflow with no reference genome you must specify `--method 'denovo'` in your parameters or `--method 'reference'` in case of the latter. - -## Lost in parameter space? - -### Denovo parameters - -For psuedo-reference construction radseq follows dDocent [paper](https://peerj.com/articles/431/), [GitHub](https://github.com/jpuritz/dDocent) - -`--sequence_type` : An acronym describing the type of sequencing method used. Avaiable options include `SE`, `PE`, `RPE`, `OL`, `ROL` - -`--minReadDepth_WithinIndividual` : minimum number of reads within an individual to include in psuedo-reference construction - -`--minReadDepth_BetweenIndividual` : minimum number of reads across individuals to include in psuedo-reference construction - -### Alignment parameters - -You can adjust the aligner in the parameters `--aligner` : [`'bwa'`,`'bwa2'`], radseq currently supports bwa mem and bwa mem2. - -### What does the bam_intervals_bedtools.nf subworkflow do? - -This subworkflow creates intervals to be passed into FreeBayes for parallel execution on regions determined based on read coverage. - -The threshold `--splitByReadCoverage` determines the amount of read depth to split an interval into smaller, 1/2 read-length sized intervals with a default of `500000`. - -**Warning** For large sample size analysis or large fastq files, it's recommended to randomly subset bam file input into subworkflow by passing `--subset_intervals_channel [integer]` into parameters. - -## Variant calling parameters - diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index 33cd4f6..0000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,528 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - System.exit(1) - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 25a0a74..0000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,336 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100755 index 1b88aec..0000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,40 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } - - // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) - - if (conda_check_failed) { - log.warn "=============================================================================\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + - "===================================================================================" - } - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index baf2669..0000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,94 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/radseq pipeline -// - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - // TODO nf-core: Add Zenodo DOI for pipeline after first release - //"* The pipeline\n" + - //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - // - // Print help to screen if required - // - public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Print parameter summary log to screen - // - public static String paramsSummaryLog(workflow, params, log) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params, log) - System.exit(0) - } - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params, log) - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (params.enable_conda) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) - } - } - - // - // Get attribute from genome config file e.g. fasta - // - public static String getGenomeAttribute(params, attribute) { - def val = '' - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - val = params.genomes[ params.genome ][ attribute ] - } - } - return val - } -} diff --git a/lib/WorkflowRadseq.groovy b/lib/WorkflowRadseq.groovy deleted file mode 100755 index 61bd95d..0000000 --- a/lib/WorkflowRadseq.groovy +++ /dev/null @@ -1,77 +0,0 @@ -// -// This file holds several functions specific to the workflow/radseq.nf in the nf-core/radseq pipeline -// - -class WorkflowRadseq { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - //genomeExistsError(params, log) // function below - - if (!params.method) { - log.error "type of workflow to execute not specified with e.g. '--method denovo' or via a detectable config file." - System.exit(1) - } - if (params.method == 'reference') { - if (!params.genome || params.genome == null) { - log.error "need to specify a genome file with e.g. '--genome fasta' or via a detectable config file." - System.exit(1) - } - } - if (params.method == 'denovo'){ - if (!params.sequence_type) { - log.error "need to specify the sequencing method with e.g. '--sequence_type' or via a detectable config file" - System.exit(1) - } - if (!params.minreaddepth_withinindividual || params.minreaddepth_withinindividual == null) { - log.warn("using default range of values for minReadDepth_withinIndividual") - } - if (params.method == 'denovo' && !params.minreaddepth_betweenindividual || params.minreaddepth_betweenindividual == null) { - log.warn("using default range of values for minReadDepth_BetweenIndividual") - } - } - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - // - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - log.error "=============================================================================\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "===================================================================================" - System.exit(1) - } - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb..0000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 8883085..a15f562 100644 --- a/main.nf +++ b/main.nf @@ -1,63 +1,104 @@ #!/usr/bin/env nextflow /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nf-core/radseq -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/radseq Website: https://nf-co.re/radseq Slack : https://nfcore.slack.com/channels/radseq ---------------------------------------------------------------------------------------- */ -nextflow.enable.dsl = 2 - /* -======================================================================================== - GENOME PARAMETER VALUES -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -//params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +include { RADSEQ } from './workflows/radseq' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_radseq_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_radseq_pipeline' +include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_radseq_pipeline' /* -======================================================================================== - VALIDATE & PRINT PARAMETER SUMMARY -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + GENOME PARAMETER VALUES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -WorkflowMain.initialise(workflow, params, log) +// TODO nf-core: Remove this line if you don't need a FASTA file +// This is an example of how to use getGenomeAttribute() to fetch parameters +// from igenomes.config using `--genome` +params.fasta = getGenomeAttribute('fasta') /* -======================================================================================== - NAMED WORKFLOW FOR PIPELINE -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOWS FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { RADSEQ } from './workflows/radseq' - // -// WORKFLOW: Run main nf-core/radseq analysis pipeline +// WORKFLOW: Run main analysis pipeline depending on type of input // workflow NFCORE_RADSEQ { - RADSEQ () -} + take: + samplesheet // channel: samplesheet read in from --input + + main: + + // + // WORKFLOW: Run pipeline + // + RADSEQ ( + samplesheet + ) + emit: + multiqc_report = RADSEQ.out.multiqc_report // channel: /path/to/multiqc_report.html +} /* -======================================================================================== - RUN ALL WORKFLOWS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { - NFCORE_RADSEQ () + + main: + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input + ) + + // + // WORKFLOW: Run main workflow + // + NFCORE_RADSEQ ( + PIPELINE_INITIALISATION.out.samplesheet + ) + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_RADSEQ.out.multiqc_report + ) } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/modules.json b/modules.json index aaf1605..f95950c 100644 --- a/modules.json +++ b/modules.json @@ -5,165 +5,37 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { - "bcftools/concat": { - "branch": "master", - "git_sha": "582ff1755bdd205c65e2ba4c31e0a008dae299ec", - "installed_by": ["modules"] - }, - "bcftools/sort": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], - "patch": "modules/nf-core/bcftools/sort/bcftools-sort.diff" - }, - "bedtools/bamtobed": { - "branch": "master", - "git_sha": "1d48427957205cb6acf1ffe330bd35b6bb8baa90", - "installed_by": ["modules"] - }, - "bedtools/coverage": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], - "patch": "modules/nf-core/bedtools/coverage/bedtools-coverage.diff" - }, - "bedtools/intersect": { - "branch": "master", - "git_sha": "e576ba52f6babb06cd7946c286e5bb4f494ee02c", - "installed_by": ["modules"] - }, - "bedtools/makewindows": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], - "patch": "modules/nf-core/bedtools/makewindows/bedtools-makewindows.diff" - }, - "bedtools/merge": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], - "patch": "modules/nf-core/bedtools/merge/bedtools-merge.diff" - }, - "bedtools/sort": { - "branch": "master", - "git_sha": "f1f473b21811b958d1317c4a97c56e16d3ee40f9", - "installed_by": ["modules"], - "patch": "modules/nf-core/bedtools/sort/bedtools-sort.diff" - }, - "bwa/index": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] - }, - "bwa/mem": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], - "patch": "modules/nf-core/bwa/mem/bwa-mem.diff" - }, - "bwamem2/index": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] - }, - "bwamem2/mem": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], - "patch": "modules/nf-core/bwamem2/mem/bwamem2-mem.diff" - }, - "cdhit/cdhit": { - "branch": "master", - "git_sha": "4a29d43b13830df333168cecf634c102eafb2556", - "installed_by": ["modules"], - "patch": "modules/nf-core/cdhit/cdhit/cdhit-cdhit.diff" - }, - "custom/dumpsoftwareversions": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] - }, - "fastp": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], - "patch": "modules/nf-core/fastp/fastp.diff" - }, "fastqc": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] - }, - "freebayes": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], - "patch": "modules/nf-core/freebayes/freebayes.diff" - }, - "minimap2/align": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] - }, - "minimap2/index": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "dc94b6ee04a05ddb9f7ae050712ff30a13149164", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba", - "installed_by": ["modules"] - }, - "samtools/faidx": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] - }, - "samtools/flagstat": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] - }, - "samtools/idxstats": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", "installed_by": ["modules"] - }, - "samtools/index": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] - }, - "samtools/merge": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] - }, - "samtools/stats": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] - }, - "seqtk/seq": { + } + } + }, + "subworkflows": { + "nf-core": { + "utils_nextflow_pipeline": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b", + "installed_by": ["subworkflows"] }, - "tabix/tabix": { + "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "git_sha": "51ae5406a030d4da1e49e4dab49756844fdd6c7a", + "installed_by": ["subworkflows"] }, - "umitools/dedup": { + "utils_nfschema_plugin": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"], - "patch": "modules/nf-core/umitools/dedup/umitools-dedup.diff" + "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", + "installed_by": ["subworkflows"] } } - }, - "subworkflows": {} + } } } } diff --git a/modules/local/bedops/merge/main.nf b/modules/local/bedops/merge/main.nf deleted file mode 100644 index b534d97..0000000 --- a/modules/local/bedops/merge/main.nf +++ /dev/null @@ -1,33 +0,0 @@ -process BEDOPS_MERGE_BED { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? "bioconda::bedops=2.4.41" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedops:2.4.41--h9f5acd7_0' : - 'quay.io/biocontainers/bedops:2.4.41--h9f5acd7_0' }" - - input: - tuple val(meta), path(bed) - - output: - tuple val(meta), path('*.bed'), emit: bed - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - if ("$bed" == "${prefix}.bed") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - bedops --merge ${bed.collect().join(" ")} \\ - ${args} \\ - > ${prefix}.bed - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedops: \$(bedops --version | awk '/version:/{print \$2}') - END_VERSIONS - """ -} \ No newline at end of file diff --git a/modules/local/bedops/merge/meta.yml b/modules/local/bedops/merge/meta.yml deleted file mode 100644 index e69de29..0000000 diff --git a/modules/local/cdhit_to_rbdiv.nf b/modules/local/cdhit_to_rbdiv.nf deleted file mode 100644 index 8b936a0..0000000 --- a/modules/local/cdhit_to_rbdiv.nf +++ /dev/null @@ -1,67 +0,0 @@ -process CDHIT_TO_RBDIV { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::coreutils=8.25" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/coreutils:8.25--0' : - 'quay.io/biocontainers/coreutils:8.25--0' }" - - input: - tuple val(meta) , path(clstr) - tuple val(meta2), path(totaluniqseq) - val (type) - - output: - tuple val(meta), path('*.rclstr') , emit: rbcluster - tuple val (meta), path ('*.contig.cluster.totaluniqseq'), emit: clstr_totaluniqseq - path 'versions.yml' , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - if (type == 'PE'){ - """ - awk '{if (\$1 ~ /Cl/) clus = clus + 1; else print \$3 "\\t" clus}' ${clstr} | \\ - sed -e 's/[>dDocent_Contig_,...]//g' | \\ - sort -g -k1 -S 2G > ${prefix}.sort.contig.cluster.ids - - paste ${prefix}.sort.contig.cluster.ids ${totaluniqseq} > ${prefix}.contig.cluster.totaluniqseq - - # cd-hit TO rainbow cluster format - sort -k2,2 -g ${prefix}.contig.cluster.totaluniqseq | \\ - sed -e 's/NNNNNNNNNN/ /g' > ${prefix}.rclstr - cat <<-END_VERSIONS > versions.yml - "${task.process}": - BusyBox: \$(busybox | sed -n -E 's/.*v([[:digit:].]+)\\s\\(.*/\\1/p') - GNU: \$(sort --version | awk 'NR==1{print \$4}') - END_VERSIONS - """ - } else { - """ - awk '{if (\$1 ~ /Cl/) clus = clus + 1; else print \$3 "\\t" clus}' ${clstr} | \\ - sed -e 's/[>dDocent_Contig_,...]//g' | \\ - sort -g -k1 > ${prefix}.sort.contig.cluster.ids - - paste ${prefix}.sort.contig.cluster.ids <(awk '!/>/' ${totaluniqseq}) > ${prefix}.contig.cluster.Funiq - - sed -e 's/NNNNNNNNNN/ /g' ${totaluniqseq} | \\ - sort -k1 -S 2G | \\ - awk '{print \$0 "\\t" NR}' > ${prefix}.totaluniqseq.CN - - join -t \$'\\t' -1 3 -2 1 ${prefix}.contig.cluster.Funiq ${prefix}.totaluniqseq.CN -o 2.3,1.2,2.1,2.2 > ${prefix}.contig.cluster.totaluniqseq - - # cd-hit TO rainbow cluster format - sort -k2,2 -g ${prefix}.contig.cluster.totaluniqseq -S 2G | \\ - sed -e 's/NNNNNNNNNN/ /g' > ${prefix}..rclstr - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - BusyBox: \$(busybox | sed -n -E 's/.*v([[:digit:].]+)\\s\\(.*/\\1/p') - GNU: \$(sort --version | awk 'NR==1{print \$4}') - END_VERSIONS - """ - } -} \ No newline at end of file diff --git a/modules/local/combine_uniq_forward_reads.nf b/modules/local/combine_uniq_forward_reads.nf deleted file mode 100644 index 564dea4..0000000 --- a/modules/local/combine_uniq_forward_reads.nf +++ /dev/null @@ -1,80 +0,0 @@ -process COMBINE_UNIQUE_READS { - tag "${meta.id}" - label 'process_medium' - - // get a can't find conda dir. ? Check you have anaconda3 installed - conda (params.enable_conda ? 'bioconda::perl-sys-info-driver-linux=0.7905' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/perl-sys-info-driver-linux:0.7905--pl5321hdfd78af_1' : - 'quay.io/upennlibraries/perl_apache' }" - - input: - tuple val (meta), path (reads) // loading all individual uniq sequence per collected - val (type) // sequencing technology used. Changes how unique sequences are identified - each withinIndv_MinDepth // within_individual - each acrossIndv_MinDepth // number of unique individuals w/ reads - - output: - tuple val (meta), path ('*_uniq.full.fasta'), emit: uniq_reads - tuple val (meta), path ('totaluniqseq') , emit: totaluniqseq - path 'versions.yml' , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - if (type == 'RPE' || type == 'ROL') { - """ - awk -v x="${withinIndv_MinDepth}" '(\$1 >= x)' *.uniq.seqs | \\ - cut -f2 | \\ - sed -e 's/NNNNNNNNNN/-/' > total.uniqs - - cut -f 1 -d "-" total.uniqs > total.u.F - cut -f 2 -d "-" total.uniqs > total.u.R - - paste total.u.F total.u.R | \\ - sort -k1 -S 2G > total.fr - - awk -v x=${withinIndv_MinDepth} '\$1 >= x' *.uniq.seqs | \\ - cut -f2 | \\ - sed -e 's/NNNNNNNNNN/ /g' | \\ - cut -f1 | \\ - uniq | \\ - sort -S 2G | \\ - uniq -c > total.f.uniq - - join -1 2 -2 1 -o 1.1,1.2,2.2 total.f.uniq total.fr | \\ - awk '{print \$1 "\t" \$2 "NNNNNNNNNN" \$3}' | \\ - awk -v x=${acrossIndv_MinDepth} '\$1 >= x' > uniq.k.${withinIndv_MinDepth}.c.${acrossIndv_MinDepth}.seqs - - sort -k1 -r -n -S 2G uniq.k.${withinIndv_MinDepth}.c.${acrossIndv_MinDepth}.seqs | \\ - cut -f2 > totaluniqseq - awk '{c= c + 1; print ">dDocent_Contig_" c "\\n" \$1}' totaluniqseq > ${prefix}_uniq.full.fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - BusyBox: \$(busybox | sed -n -E 's/.*v([[:digit:].]+)\\s\\(.*/\\1/p') - END_VERSIONS - """ - } else { - """ - awk -v x=${withinIndv_MinDepth} '(\$1 >= x)' *.uniq.seqs | \\ - cut -f2 | \\ - perl -e 'while (<>) {chomp; \$z{\$_}++;} while((\$k,\$v) = each(%z)) {print "\$v\\t\$k\\n";}' | \\ - awk -v x=${acrossIndv_MinDepth} '(\$1 >= x)' > uniq.k.${withinIndv_MinDepth}.c.${acrossIndv_MinDepth}.seqs - - # order the sequences for reproducibility - sort -k1 -r -n -S 2G uniq.k.${withinIndv_MinDepth}.c.${acrossIndv_MinDepth}.seqs | \\ - cut -f2 > totaluniqseq - - awk '{c= c + 1; print ">dDocent_Contig_" c "\\n" \$1}' totaluniqseq > ${prefix}_uniq.full.fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - BusyBox: \$(busybox | sed -n -E 's/.*v([[:digit:].]+)\\s\\(.*/\\1/p') - perl: \$(perl --version | sed -n -E '/^This is/ s/.*\\(v([[:digit:].]+)\\).*/\\1/p') - END_VERSIONS - """ - } -} \ No newline at end of file diff --git a/modules/local/create_intervals.nf b/modules/local/create_intervals.nf deleted file mode 100644 index 824fe75..0000000 --- a/modules/local/create_intervals.nf +++ /dev/null @@ -1,74 +0,0 @@ -process CREATE_INTERVALS { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::perl=5.26.2" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/perl:5.26.2' : - 'quay.io/biocontainers/perl:5.26.2' }" - - input: - tuple val(meta), path(cov), path(intersect), path(low_cov) - val (lengths) - - output: - tuple val(meta), path('mapped.*.bed'), emit: intervals - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "$meta.id" - if (params.method == 'denovo') { - """ - cat ${intersect} ${low_cov} > ${prefix}_cov.split.stats - echo "${lengths.join("\n")}" > ${prefix}_lengths.txt - MaxLen=\$(awk '{ print length() | "sort -rn" }' ${prefix}_lengths.txt| head -1) - MaxLen2=\$(( \$MaxLen / 2 )) - - TT=\$(( \$MaxLen2 * 1000000 )) - DP=\$(awk '{print \$4}' ${cov} | sort -rn | perl -e '\$d=.001;@l=<>;print \$l[int(\$d*@l)]') - CC=\$( awk -v x=\$DP '\$4 < x' ${cov} | awk '{len=\$3-\$2;lc=len*\$4;tl=tl+lc} END {OFMT = "%.0f";print tl/"'${task.cpus}'"}') - - awk -v x=\$DP '\$4 < x' ${prefix}_cov.split.stats | sort -k1,1 -k2,2 | awk -v cutoff=\$CC -v tt=\$TT 'BEGIN{i=1} - {len=\$3-\$2;lc=len*\$4;cov = cov + lc - if (NR == 1 && lc > tt) {x="mapped."i".bed";print \$1"\\t"\$2"\\t"\$3 > x; i=i+1; e=1} - else if ( cov < cutoff && lc < tt) {x="mapped."i".bed";print \$1"\\t"\$2"\\t"\$3 > x; e=0} - else if (lc > tt && e > 0 ) {x="mapped."i".bed"; print \$1"\\t"\$2"\\t"\$3 > x; cov=0;i=i+1; e=1} - else if (lc > tt && e < 1 ) {i=i+1; x="mapped."i".bed"; print \$1"\\t"\$2"\\t"\$3 > x; cov=0;i=i+1;e=1} - else if (cov > cutoff && lc < tt ) {i=i+1; x="mapped."i".bed"; print \$1"\\t"\$2"\\t"\$3 > x; cov=lc;e=0} - }' - cat <<-END_VERSIONS > versions.yml - "${task.process}": - BusyBox: \$(busybox | sed -n -E 's/.*v([[:digit:].]+)\\s\\(.*/\\1/p') - perl: \$(perl --version | sed -n -E '/^This is/ s/.*\\(v([[:digit:].]+)\\).*/\\1/p') - END_VERSIONS - """ - } else { - """ - cat ${intersect} ${low_cov} > ${prefix}_cov.split.stats - echo "${lengths.join("\n")}" > ${prefix}_lengths.txt - MaxLen=\$(awk '{ print length() | "sort -rn" }' ${prefix}_lengths.txt| head -1) - MaxLen2=\$(( \$MaxLen / 2 )) - - TT=\$(( \$MaxLen2 * 1000000 )) - DP=\$(awk '{print \$4}' ${cov} | sort -rn | perl -e '\$d=.00005;@l=<>;print \$l[int(\$d*@l)]') - CC=\$( awk -v x=\$DP '\$4 < x' ${cov} | awk '{len=\$3-\$2;lc=len*\$4;tl=tl+lc} END {OFMT = "%.0f";print tl/"'${task.cpus}'"}') - - awk -v x=\$DP '\$4 < x' ${prefix}_cov.split.stats | sort -k1,1 -k2,2 | awk -v cutoff=\$CC -v tt=\$TT 'BEGIN{i=1} - {len=\$3-\$2;lc=len*\$4;cov = cov + lc - if (NR == 1 && lc > tt) {x="mapped."i".bed";print \$1"\\t"\$2"\\t"\$3 > x; i=i+1; e=1} - else if ( cov < cutoff && lc < tt) {x="mapped."i".bed";print \$1"\\t"\$2"\\t"\$3 > x; e=0} - else if (lc > tt && e > 0 ) {x="mapped."i".bed"; print \$1"\\t"\$2"\\t"\$3 > x; cov=0;i=i+1; e=1} - else if (lc > tt && e < 1 ) {i=i+1; x="mapped."i".bed"; print \$1"\\t"\$2"\\t"\$3 > x; cov=0;i=i+1;e=1} - else if (cov > cutoff && lc < tt ) {i=i+1; x="mapped."i".bed"; print \$1"\\t"\$2"\\t"\$3 > x; cov=lc;e=0} - }' - cat <<-END_VERSIONS > versions.yml - "${task.process}": - BusyBox: \$(busybox | sed -n -E 's/.*v([[:digit:].]+)\\s\\(.*/\\1/p') - perl: \$(perl --version | sed -n -E '/^This is/ s/.*\\(v([[:digit:].]+)\\).*/\\1/p') - END_VERSIONS - """ - } -} \ No newline at end of file diff --git a/modules/local/prepare_forward_reads.nf b/modules/local/prepare_forward_reads.nf deleted file mode 100644 index b6ef4cc..0000000 --- a/modules/local/prepare_forward_reads.nf +++ /dev/null @@ -1,104 +0,0 @@ -process PREPARE_FORWARD_READS { - tag "${meta.id}" - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::perl-sys-info-driver-linux=0.7905' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/perl-sys-info-driver-linux:0.7905--pl5321hdfd78af_1' : - 'quay.io/biocontainers/perl-sys-info-driver-linux:0.7905--pl5321hdfd78af_1' }" - - input: - tuple val (meta), path (reads) - val (type) // sequencing technology used. Changes how unique sequences are identified - - output: - tuple val (meta), path ('*.uniq.seqs'), emit: indv_uniq_seqs - path 'versions.yml' , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - if (type == 'PE') { - def forward_reads = "${reads[0]}" - def reverse_reads = "${reads[1]}" - """ - gunzip -c ${forward_reads} | \\ - awk 'BEGIN{P=1}{if(P==1||P==2){gsub(/^[@]/,">");print}; if(P==4)P=0; P++}' | \\ - awk '!/>/' > ${prefix}.forward - - gunzip -c ${reverse_reads} | \\ - awk 'BEGIN{P=1}{if(P==1||P==2){gsub(/^[@]/,">");print}; if(P==4)P=0; P++}' | \\ - awk '!/>/' > ${prefix}.reverse - - paste -d '-' ${prefix}.forward ${prefix}.reverse | \\ - awk 'BEGIN{P=1}{if(P==1||P==2){gsub(/^[@]/,">");print}; if(P==4)P=0; P++}' | \\ - sed -e 's/-/NNNNNNNNNN/' | \\ - perl -e 'while (<>) {chomp; \$z{\$_}++;} while((\$k,\$v) = each(%z)) {print "\$v\\t\$k\\n";}' > ${prefix}.uniq.seqs - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - BusyBox: \$(busybox | sed -n -E 's/.*v([[:digit:].]+)\\s\\(.*/\\1/p') - perl: \$(perl --version | sed -n -E '/^This is/ s/.*\\(v([[:digit:].]+)\\).*/\\1/p') - END_VERSIONS - """ - } else if (type == 'RPE') { - def forward_reads = "${reads[0]}" - def reverse_reads = "${reads[1]}" - """ - gunzip -c ${forward_reads} | \\ - awk 'BEGIN{P=1}{if(P==1||P==2){gsub(/^[@]/,">");print}; if(P==4)P=0; P++}' | \\ - awk '!/>/' > ${prefix}.forward - - gunzip -c ${reverse_reads} | \\ - awk 'BEGIN{P=1}{if(P==1||P==2){gsub(/^[@]/,">");print}; if(P==4)P=0; P++}' | \\ - awk '!/>/' > ${prefix}.reverse - - paste ${prefix}.forward ${prefix}.reverse | sort -k1 -S 200M > ${prefix}.fr - cut -f1 ${prefix}.fr | uniq -c > ${prefix}.f.uniq && cut -f2 ${prefix}.fr > ${prefix}.r - awk '{for(i=0;i<\$1;i++)print}' ${prefix}.f.uniq > ${prefix}.f.uniq.e - - paste -d '-' ${prefix}.f.uniq.e ${prefix}.r | \\ - awk '!/NNN/'| \\ - sed -e 's/-/NNNNNNNNNN/' | \\ - sed -e 's/^[ \\t]*//' | \\ - sed -e 's/\\s/\\t/g' > ${prefix}.uniq.seqs - - rm ${prefix}.f.uniq.e ${prefix}.f.uniq ${prefix}.r ${prefix}.fr - -cat <<-END_VERSIONS > versions.yml -"${task.process}": - BusyBox: \$(busybox | sed -n -E 's/.*v([[:digit:].]+)\\s\\(.*/\\1/p') -END_VERSIONS - """ - } else if (type == 'SE'){ - def forward_reads = "${reads[0]}" - """ - gunzip -c ${forward_reads} | \\ - awk 'BEGIN{P=1}{if(P==1||P==2){gsub(/^[@]/,">");print}; if(P==4)P=0; P++}' | \\ - awk '!/>/' | \\ - perl -e 'while (<>) {chomp; \$z{\$_}++;} while((\$k,\$v) = each(%z)) {print "\$v\\t\$k\\n";}' > ${prefix}.uniq.seqs - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - BusyBox: \$(busybox | sed -n -E 's/.*v([[:digit:].]+)\\s\\(.*/\\1/p') - perl: \$(perl --version | sed -n -E '/^This is/ s/.*\\(v([[:digit:].]+)\\).*/\\1/p') - END_VERSIONS - """ - } else if (type == 'OL') { - """ - awk 'BEGIN{P=1}{if(P==1||P==2){gsub(/^[@]/,">");print}; if(P==4)P=0; P++}' ${assembled_fastq} | \\ - awk '!/>/' | \\ - perl -e 'while (<>) {chomp; \$z{\$_}++;} while((\$k,\$v) = each(%z)) {print "\$v\\t\$k\\n";}' > ${prefix}.uniq.seqs - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - BusyBox: \$(busybox | sed -n -E 's/.*v([[:digit:].]+)\\s\\(.*/\\1/p') - perl: \$(perl --version | sed -n -E '/^This is/ s/.*\\(v([[:digit:].]+)\\).*/\\1/p') - END_VERSIONS - """ - } else { - error "invalid sequence type specified or is not supported: ${type}" - } -} \ No newline at end of file diff --git a/modules/local/rainbow/div/rainbow_div.nf b/modules/local/rainbow/div/rainbow_div.nf deleted file mode 100644 index 0a5faf0..0000000 --- a/modules/local/rainbow/div/rainbow_div.nf +++ /dev/null @@ -1,34 +0,0 @@ -process RAINBOW_DIV { - tag "${meta.id}" - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::rainbow=2.0.4' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/rainbow:2.0.4--hec16e2b_7' : - 'quay.io/biocontainers/rainbow:2.0.4--hec16e2b_7' }" - - input: - tuple val (meta), path (cluster) - - output: - tuple val (meta), path ("*_rbdiv.out") , emit: rbdiv - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - rainbow div \\ - -i ${cluster} \\ - -o ${prefix}_rbdiv.out \\ - ${args} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - rainbow: \$(rainbow | head -n 1 | cut -d ' ' -f 2) - END_VERSIONS - """ -} \ No newline at end of file diff --git a/modules/local/rainbow/merge/rainbow_merge.nf b/modules/local/rainbow/merge/rainbow_merge.nf deleted file mode 100644 index 77e2196..0000000 --- a/modules/local/rainbow/merge/rainbow_merge.nf +++ /dev/null @@ -1,38 +0,0 @@ -process RAINBOW_MERGE { - tag "${meta.id}" - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::rainbow=2.0.4' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/rainbow:2.0.4--hec16e2b_7' : - 'quay.io/biocontainers/rainbow:2.0.4--hec16e2b_7' }" - - input: - tuple val (meta), path (rbdiv) - val (type) - val (save_assembly) - - output: - tuple val (meta), path ("*_rbmerge.out"), emit: rbmerge - tuple val (meta), path ('*_rbmerge.log'), emit: log - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - def output_assembly = save_assembly ? '-a' : '' - def args = task.ext.args ?: '' - """ - rainbow merge -i ${rbdiv} -o ${prefix}_rbmerge.out \\ - ${args} \\ - ${output_assembly} \\ - 2> ${prefix}_rbmerge.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - rainbow: \$(rainbow | head -n 1 | cut -d ' ' -f 2) - END_VERSIONS - """ -} \ No newline at end of file diff --git a/modules/local/rbmerge2fasta.nf b/modules/local/rbmerge2fasta.nf deleted file mode 100644 index 660bffa..0000000 --- a/modules/local/rbmerge2fasta.nf +++ /dev/null @@ -1,67 +0,0 @@ -process RBMERGE2FASTA { - tag "${meta.id}" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::seqtk=1.3" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqtk:1.3--h5bf99c6_3' : - 'quay.io/biocontainers/seqtk:1.3--h5bf99c6_3' }" - - input: - tuple val (meta), path (rbdiv) - tuple val (meta), path (rbmerge) - - output: - tuple val (meta), path ('*_rainbow.fasta'), emit: fasta - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - LENGTH1=\$(cut -f3 ${rbdiv} | awk '(NR==1||length len1) { - c=c+1; print ">dDocent_A_Contig_" e "\\n" seq2 "NNNNNNNNNN" seq1; seq1=0; seq2=0;lenp=0;e=\$2;fclus=0;len1=0;freqp=0;lenf=0 - } - else if (\$1 ~/E/ && lenp <= len1) { - c=c+1; print ">dDocent_Contig_" e "\\n" seq1; seq1=0; seq2=0;lenp=0;e=\$2;fclus=0;len1=0;freqp=0;lenf=0 - } - else if (\$1 ~/C/) clus=\$2; - else if (\$1 ~/L/) len=\$2; - else if (\$1 ~/S/) seq=\$2; - else if (\$1 ~/N/) freq=\$2; - else if (\$1 ~/R/ && \$0 ~/0/ && \$0 !~/1/ && len > lenf) { - seq1 = seq; fclus=clus;lenf=len - } - else if (\$1 ~/R/ && \$0 ~/0/ && \$0 ~/1/ && \$0 ~/^R 0/ && len <= mlen) { - seq1 = seq; fclus=clus;lenf=len - } - else if (\$1 ~/R/ && \$0 ~/0/ && \$0 ~/1/ && \$0 ~!/^R 0/ && len > mlen) { - seq1 = seq; fclus=clus; len1=len - } - else if (\$1 ~/R/ && \$0 ~/0/ && \$0 ~/1/ && \$0 ~!/^R 0/ && len <= mlen) { - seq1 = seq; fclus=clus; lenf=len - } - else if (\$1 ~/R/ && \$0 ~!/0/ && freq > freqp && len >= lenp || \$1 ~/R/ && \$0 ~!/0/ && freq == freqp && len > lenp) { - seq2 = seq; lenp = len; freqp=freq - } - }' > ${prefix}_rainbow.fasta - - seqtk seq -r ${prefix}_rainbow.fasta > ${prefix}_rainbow.RC.fasta - mv ${prefix}_rainbow.RC.fasta ${prefix}_rainbow.fasta - -cat <<-END_VERSIONS > versions.yml -"${task.process}": - BusyBox: \$(busybox | sed -n -E 's/.*v([[:digit:].]+)\\s\\(.*/\\1/p') - seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') -END_VERSIONS - """ -} \ No newline at end of file diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index 8f8b5ae..0000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,27 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - script: // This script is bundled with the pipeline, in nf-core/radseq/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/nf-core/bcftools/concat/main.nf b/modules/nf-core/bcftools/concat/main.nf deleted file mode 100644 index c7c39d9..0000000 --- a/modules/nf-core/bcftools/concat/main.nf +++ /dev/null @@ -1,46 +0,0 @@ -process BCFTOOLS_CONCAT { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::bcftools=1.16" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': - 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" - - input: - tuple val(meta), path(vcfs), path(tbi) - - output: - tuple val(meta), path("*.gz"), emit: vcf - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - """ - bcftools concat \\ - --output ${prefix}.vcf.gz \\ - $args \\ - --threads $task.cpus \\ - ${vcfs} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ - - stub: - prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.vcf.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/bcftools/concat/meta.yml b/modules/nf-core/bcftools/concat/meta.yml deleted file mode 100644 index e8c83cd..0000000 --- a/modules/nf-core/bcftools/concat/meta.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: bcftools_concat -description: Concatenate VCF files -keywords: - - variant calling - - concat - - bcftools - - VCF - -tools: - - concat: - description: | - Concatenate VCF files. - homepage: http://samtools.github.io/bcftools/bcftools.html - documentation: http://www.htslib.org/doc/bcftools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - vcfs: - type: files - description: | - List containing 2 or more vcf files - e.g. [ 'file1.vcf', 'file2.vcf' ] - - tbi: - type: files - description: | - List containing 2 or more index files (optional) - e.g. [ 'file1.tbi', 'file2.tbi' ] -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - vcf: - type: file - description: VCF concatenated output file - pattern: "*.{vcf.gz}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@abhi18av" - - "@nvnieuwk" diff --git a/modules/nf-core/bcftools/sort/bcftools-sort.diff b/modules/nf-core/bcftools/sort/bcftools-sort.diff deleted file mode 100644 index 9253028..0000000 --- a/modules/nf-core/bcftools/sort/bcftools-sort.diff +++ /dev/null @@ -1,18 +0,0 @@ -Changes in module 'nf-core/bcftools/sort' ---- modules/nf-core/bcftools/sort/main.nf -+++ modules/nf-core/bcftools/sort/main.nf -@@ -20,10 +20,11 @@ - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" -+ def interval = meta.interval ? '_' + meta.interval : '' - """ - bcftools \\ - sort \\ -- --output ${prefix}.vcf.gz \\ -+ --output ${prefix}${interval}.vcf.gz \\ - $args \\ - $vcf - - -************************************************************ diff --git a/modules/nf-core/bcftools/sort/main.nf b/modules/nf-core/bcftools/sort/main.nf deleted file mode 100644 index 41f1b73..0000000 --- a/modules/nf-core/bcftools/sort/main.nf +++ /dev/null @@ -1,48 +0,0 @@ -process BCFTOOLS_SORT { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::bcftools=1.16" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': - 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" - - input: - tuple val(meta), path(vcf) - - output: - tuple val(meta), path("*.gz"), emit: vcf - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def interval = meta.interval ? '_' + meta.interval : '' - """ - bcftools \\ - sort \\ - --output ${prefix}${interval}.vcf.gz \\ - $args \\ - $vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - touch ${prefix}.vcf.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/bcftools/sort/meta.yml b/modules/nf-core/bcftools/sort/meta.yml deleted file mode 100644 index 0c244a4..0000000 --- a/modules/nf-core/bcftools/sort/meta.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: bcftools_sort -description: Sorts VCF files -keywords: - - sorting - - VCF - - variant calling -tools: - - sort: - description: Sort VCF files by coordinates. - homepage: http://samtools.github.io/bcftools/bcftools.html - documentation: http://www.htslib.org/doc/bcftools.html - tool_dev_url: https://github.com/samtools/bcftools - doi: "10.1093/bioinformatics/btp352" - licence: ["MIT"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - vcf: - type: file - description: The VCF/BCF file to be sorted - pattern: "*.{vcf.gz,vcf,bcf}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - vcf: - type: file - description: Sorted VCF file - pattern: "*.{vcf.gz}" - -authors: - - "@Gwennid" diff --git a/modules/nf-core/bedtools/bamtobed/main.nf b/modules/nf-core/bedtools/bamtobed/main.nf deleted file mode 100644 index e967357..0000000 --- a/modules/nf-core/bedtools/bamtobed/main.nf +++ /dev/null @@ -1,35 +0,0 @@ -process BEDTOOLS_BAMTOBED { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::bedtools=2.30.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" - - input: - tuple val(meta), path(bam) - - output: - tuple val(meta), path("*.bed"), emit: bed - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - bedtools \\ - bamtobed \\ - $args \\ - -i $bam \\ - > ${prefix}.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/bedtools/bamtobed/meta.yml b/modules/nf-core/bedtools/bamtobed/meta.yml deleted file mode 100644 index 5a4ff73..0000000 --- a/modules/nf-core/bedtools/bamtobed/meta.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: bedtools_bamtobed -description: Converts a bam file to a bed12 file. -keywords: - - bam - - bed -tools: - - bedtools: - description: | - A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. - documentation: https://bedtools.readthedocs.io/en/latest/content/tools/complement.html - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: Input BAM file - pattern: "*.{bam}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bed: - type: file - description: Bed file containing genomic intervals. - pattern: "*.{bed}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@yuukiiwa" - - "@drpatelh" diff --git a/modules/nf-core/bedtools/coverage/bedtools-coverage.diff b/modules/nf-core/bedtools/coverage/bedtools-coverage.diff deleted file mode 100644 index 8669f37..0000000 --- a/modules/nf-core/bedtools/coverage/bedtools-coverage.diff +++ /dev/null @@ -1,37 +0,0 @@ -Changes in module 'nf-core/bedtools/coverage' ---- modules/nf-core/bedtools/coverage/main.nf -+++ modules/nf-core/bedtools/coverage/main.nf -@@ -1,5 +1,4 @@ - process BEDTOOLS_COVERAGE { -- tag "$meta.id" - label 'process_medium' - - conda "bioconda::bedtools=2.30.0" -@@ -12,14 +11,14 @@ - path genome_file - - output: -- tuple val(meta), path("*.bed"), emit: bed -+ tuple val(meta), path("*.cov"), emit: bed - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: -- def args = task.ext.args ?: '' -+ def args = task.ext.args ? task.ext.args + ' -counts' : '-counts' - def prefix = task.ext.prefix ?: "${meta.id}" - def reference = genome_file ? "-g ${genome_file} -sorted" : "" - """ -@@ -29,7 +28,7 @@ - $reference \\ - -a $input_A \\ - -b $input_B \\ -- > ${prefix}.bed -+ > ${prefix}.cov - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - -************************************************************ diff --git a/modules/nf-core/bedtools/coverage/main.nf b/modules/nf-core/bedtools/coverage/main.nf deleted file mode 100644 index d171187..0000000 --- a/modules/nf-core/bedtools/coverage/main.nf +++ /dev/null @@ -1,38 +0,0 @@ -process BEDTOOLS_COVERAGE { - label 'process_medium' - - conda "bioconda::bedtools=2.30.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--h468198e_3': - 'quay.io/biocontainers/bedtools:2.30.0--h468198e_3' }" - - input: - tuple val(meta), path(input_A), path(input_B) - path genome_file - - output: - tuple val(meta), path("*.cov"), emit: bed - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ? task.ext.args + ' -counts' : '-counts' - def prefix = task.ext.prefix ?: "${meta.id}" - def reference = genome_file ? "-g ${genome_file} -sorted" : "" - """ - bedtools \\ - coverage \\ - $args \\ - $reference \\ - -a $input_A \\ - -b $input_B \\ - > ${prefix}.cov - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(echo \$(bedtools --version 2>&1) | sed 's/^.*bedtools v//' )) - END_VERSIONS - """ -} diff --git a/modules/nf-core/bedtools/coverage/meta.yml b/modules/nf-core/bedtools/coverage/meta.yml deleted file mode 100644 index bb90987..0000000 --- a/modules/nf-core/bedtools/coverage/meta.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: "bedtools_coverage" -description: computes both the depth and breadth of coverage of features in file B on the features in file A -keywords: - - bedtools - - coverage - - bam - - bed - - gff - - vcf - - histogram -tools: - - "bedtools": - description: "A powerful toolset for genome arithmetic" - homepage: "https://bedtools.readthedocs.io/en/latest/index.html" - documentation: "https://bedtools.readthedocs.io/en/latest/content/bedtools-suite.html" - tool_dev_url: "https://github.com/arq5x/bedtools2" - doi: "10.1093/bioinformatics/btq033" - licence: "['GPL v2', 'MIT']" - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input_A: - type: file - description: BAM/BED/GFF/VCF file - pattern: "*.{bam,bed,gff,vcf}" - - input_B: - type: file - description: One or more BAM/BED/GFF/VCF file - pattern: "*.{bam,bed,gff,vcf}" - - genome_file: - type: file - description: | - Optional reference genome 2 column file that defines the expected chromosome order - in the input files for use with the -sorted option. - When `genome_file` is provided, `-sorted` option is added to the command. - pattern: "*.{fai,txt,chromsizes}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bed: - type: file - description: File containing coverage of sequence alignments - pattern: "*.bed" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@priyanka-surana" diff --git a/modules/nf-core/bedtools/intersect/main.nf b/modules/nf-core/bedtools/intersect/main.nf deleted file mode 100644 index 2bd1fe5..0000000 --- a/modules/nf-core/bedtools/intersect/main.nf +++ /dev/null @@ -1,54 +0,0 @@ -process BEDTOOLS_INTERSECT { - tag "$meta.id" - label 'process_single' - - conda "bioconda::bedtools=2.30.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" - - input: - tuple val(meta), path(intervals1), path(intervals2) - val extension - - output: - tuple val(meta), path("*.${extension}"), emit: intersect - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - if ("$intervals1" == "${prefix}.${extension}" || - "$intervals2" == "${prefix}.${extension}") - error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - bedtools \\ - intersect \\ - -a $intervals1 \\ - -b $intervals2 \\ - $args \\ - > ${prefix}.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - if ("$intervals1" == "${prefix}.${extension}" || - "$intervals2" == "${prefix}.${extension}") - error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - touch ${prefix}.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/bedtools/intersect/meta.yml b/modules/nf-core/bedtools/intersect/meta.yml deleted file mode 100644 index 6e21e92..0000000 --- a/modules/nf-core/bedtools/intersect/meta.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: bedtools_intersect -description: Allows one to screen for overlaps between two sets of genomic features. -keywords: - - bed - - intersect -tools: - - bedtools: - description: | - A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. - documentation: https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - intervals1: - type: file - description: BAM/BED/GFF/VCF - pattern: "*.{bam|bed|gff|vcf}" - - intervals2: - type: file - description: BAM/BED/GFF/VCF - pattern: "*.{bam|bed|gff|vcf}" - - extension: - type: value - description: Extension of the output file. It is set by the user and corresponds to the file format which depends on arguments (e. g., ".bed", ".bam", ".txt", etc.). -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - intersect: - type: file - description: File containing the description of overlaps found between the two features - pattern: "*.${extension}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@Emiller88" - - "@sruthipsuresh" - - "@drpatelh" - - "@sidorov-si" diff --git a/modules/nf-core/bedtools/makewindows/bedtools-makewindows.diff b/modules/nf-core/bedtools/makewindows/bedtools-makewindows.diff deleted file mode 100644 index 9ef3e10..0000000 --- a/modules/nf-core/bedtools/makewindows/bedtools-makewindows.diff +++ /dev/null @@ -1,63 +0,0 @@ -Changes in module 'nf-core/bedtools/makewindows' ---- modules/nf-core/bedtools/makewindows/main.nf -+++ modules/nf-core/bedtools/makewindows/main.nf -@@ -1,8 +1,8 @@ - process BEDTOOLS_MAKEWINDOWS { - tag "$meta.id" -- label 'process_single' -+ label 'process_medium' - -- conda "bioconda::bedtools=2.30.0" -+ conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--h7d7f7ad_1' : - 'quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_1' }" -@@ -10,10 +10,14 @@ - input: - tuple val(meta), path(regions) - val(use_bed) -+ val(lengths) -+ val(coverage_threshold) - - output: -- tuple val(meta), path("*.tab"), emit: tab -- path "versions.yml" , emit: versions -+ tuple val(meta), path("*.tab") , emit: tab -+ tuple val(meta), path("*_cov.low.stats") , emit: low_cov -+ tuple val(meta), path("*_cov.high.stats"), emit: high_cov -+ path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when -@@ -23,15 +27,23 @@ - def prefix = task.ext.prefix ?: "${meta.id}" - def arg_input = use_bed ? "-b $regions" : "-g $regions" - """ -+ echo "${lengths.join("\n")}" > "${prefix}_lengths.txt" -+ MaxLen=\$(awk '{ print length() | "sort -rn" }' "${prefix}_lengths.txt" | head -1) -+ #split cov.stats file into high and low coverage intervals -+ awk '\$4 > ${coverage_threshold}' ${regions} > ${prefix}_cov.high.stats -+ awk '\$4 <= ${coverage_threshold}' ${regions} > ${prefix}_cov.low.stats -+ MaxLen2=\$(("\$MaxLen" / 2)) -+ ML1=\$(("\$MaxLen2" + 1)) - bedtools \\ - makewindows \\ -- ${arg_input} \\ -+ -b ${prefix}_cov.high.stats \\ -+ -w \$MaxLen2 -s \$ML1 \\ - $args \\ - > ${prefix}.tab -- -- cat <<-END_VERSIONS > versions.yml -- "${task.process}": -- bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") -- END_VERSIONS -+ -+cat <<-END_VERSIONS > versions.yml -+"${task.process}": -+ bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") -+END_VERSIONS - """ --} -+} -************************************************************ diff --git a/modules/nf-core/bedtools/makewindows/main.nf b/modules/nf-core/bedtools/makewindows/main.nf deleted file mode 100644 index 5106133..0000000 --- a/modules/nf-core/bedtools/makewindows/main.nf +++ /dev/null @@ -1,49 +0,0 @@ -process BEDTOOLS_MAKEWINDOWS { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--h7d7f7ad_1' : - 'quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_1' }" - - input: - tuple val(meta), path(regions) - val(use_bed) - val(lengths) - val(coverage_threshold) - - output: - tuple val(meta), path("*.tab") , emit: tab - tuple val(meta), path("*_cov.low.stats") , emit: low_cov - tuple val(meta), path("*_cov.high.stats"), emit: high_cov - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def arg_input = use_bed ? "-b $regions" : "-g $regions" - """ - echo "${lengths.join("\n")}" > "${prefix}_lengths.txt" - MaxLen=\$(awk '{ print length() | "sort -rn" }' "${prefix}_lengths.txt" | head -1) - #split cov.stats file into high and low coverage intervals - awk '\$4 > ${coverage_threshold}' ${regions} > ${prefix}_cov.high.stats - awk '\$4 <= ${coverage_threshold}' ${regions} > ${prefix}_cov.low.stats - MaxLen2=\$(("\$MaxLen" / 2)) - ML1=\$(("\$MaxLen2" + 1)) - bedtools \\ - makewindows \\ - -b ${prefix}_cov.high.stats \\ - -w \$MaxLen2 -s \$ML1 \\ - $args \\ - > ${prefix}.tab - -cat <<-END_VERSIONS > versions.yml -"${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") -END_VERSIONS - """ -} \ No newline at end of file diff --git a/modules/nf-core/bedtools/makewindows/meta.yml b/modules/nf-core/bedtools/makewindows/meta.yml deleted file mode 100644 index 9de31f4..0000000 --- a/modules/nf-core/bedtools/makewindows/meta.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: bedtools_makewindows - -description: Makes adjacent or sliding windows across a genome or BED file. -keywords: - - bed - - windows -tools: - - bedtools: - description: A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. - homepage: https://bedtools.readthedocs.io - documentation: https://bedtools.readthedocs.io/en/latest/content/tools/makewindows.html - tool_dev_url: None - doi: "10.1093/bioinformatics/btq033" - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - regions: - type: file - description: BED file OR Genome details file () - pattern: "*.{bed,fai,tab}" - - use_bed: - type: boolean - description: true = input is a BED file; false = input is a genome details file -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - tab: - type: file - description: Windows TAB file (BED or BED-like format) - pattern: "*.tab" -authors: - - "@kevbrick" diff --git a/modules/nf-core/bedtools/merge/bedtools-merge.diff b/modules/nf-core/bedtools/merge/bedtools-merge.diff deleted file mode 100644 index a4ae4b5..0000000 --- a/modules/nf-core/bedtools/merge/bedtools-merge.diff +++ /dev/null @@ -1,54 +0,0 @@ -Changes in module 'nf-core/bedtools/merge' ---- modules/nf-core/bedtools/merge/main.nf -+++ modules/nf-core/bedtools/merge/main.nf -@@ -1,36 +1,38 @@ --process BEDTOOLS_MERGE { -+process BEDTOOLS_MERGE_COV { - tag "$meta.id" -- label 'process_single' -+ label 'process_high_memory' - -- conda "bioconda::bedtools=2.30.0" -+ conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" - - input: -- tuple val(meta), path(bed) -+ tuple val(meta), path(cov) -+ path(faidx) - - output: -- tuple val(meta), path('*.bed'), emit: bed -+ tuple val(meta), path('*.cov'), emit: cov - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: -- def args = task.ext.args ?: '' -+ def args = task.ext.args ? task.ext.args + ' -c 4 -o sum' : '-c 4 -o sum' - def prefix = task.ext.prefix ?: "${meta.id}" -- if ("$bed" == "${prefix}.bed") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" -+ if ("$cov" == "${prefix}.cov") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ -+ cat ${cov} | \\ -+ bedtools sort -i - -faidx ${faidx} | \\ - bedtools \\ - merge \\ -- -i $bed \\ -+ -i - \\ - $args \\ -- > ${prefix}.bed -- -+ > ${prefix}.cov - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS - """ --} -+} -************************************************************ diff --git a/modules/nf-core/bedtools/merge/main.nf b/modules/nf-core/bedtools/merge/main.nf deleted file mode 100644 index c16fdf9..0000000 --- a/modules/nf-core/bedtools/merge/main.nf +++ /dev/null @@ -1,38 +0,0 @@ -process BEDTOOLS_MERGE_COV { - tag "$meta.id" - label 'process_high_memory' - - conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" - - input: - tuple val(meta), path(cov) - path(faidx) - - output: - tuple val(meta), path('*.cov'), emit: cov - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ? task.ext.args + ' -c 4 -o sum' : '-c 4 -o sum' - def prefix = task.ext.prefix ?: "${meta.id}" - if ("$cov" == "${prefix}.cov") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - cat ${cov} | \\ - bedtools sort -i - -faidx ${faidx} | \\ - bedtools \\ - merge \\ - -i - \\ - $args \\ - > ${prefix}.cov - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS - """ -} \ No newline at end of file diff --git a/modules/nf-core/bedtools/merge/meta.yml b/modules/nf-core/bedtools/merge/meta.yml deleted file mode 100644 index 7674367..0000000 --- a/modules/nf-core/bedtools/merge/meta.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: bedtools_merge -description: combines overlapping or “book-ended” features in an interval file into a single feature which spans all of the combined features. -keywords: - - bed - - merge -tools: - - bedtools: - description: | - A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. - documentation: https://bedtools.readthedocs.io/en/latest/content/tools/merge.html - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bed: - type: file - description: Input BED file - pattern: "*.{bed}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bed: - type: file - description: Overlapped bed file with combined features - pattern: "*.{bed}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@Emiller88" - - "@sruthipsuresh" - - "@drpatelh" diff --git a/modules/nf-core/bedtools/sort/bedtools-sort.diff b/modules/nf-core/bedtools/sort/bedtools-sort.diff deleted file mode 100644 index 8a3712b..0000000 --- a/modules/nf-core/bedtools/sort/bedtools-sort.diff +++ /dev/null @@ -1,32 +0,0 @@ -Changes in module 'nf-core/bedtools/sort' ---- modules/nf-core/bedtools/sort/main.nf -+++ modules/nf-core/bedtools/sort/main.nf -@@ -12,7 +12,7 @@ - path genome_file - - output: -- tuple val(meta), path("*.${extension}"), emit: sorted -+ tuple val(meta), path("*_sorted.${extension}"), emit: sorted - path "versions.yml" , emit: versions - - when: -@@ -23,7 +23,7 @@ - def prefix = task.ext.prefix ?: "${meta.id}" - def genome_cmd = genome_file ? "-g $genome_file" : "" - extension = task.ext.suffix ?: intervals.extension -- if ("$intervals" == "${prefix}.${extension}") { -+ if ("$intervals" == "${prefix}_sorted.${extension}") { - error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - } - """ -@@ -32,7 +32,7 @@ - -i $intervals \\ - $genome_cmd \\ - $args \\ -- > ${prefix}.${extension} -+ > ${prefix}_sorted.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - -************************************************************ diff --git a/modules/nf-core/bedtools/sort/main.nf b/modules/nf-core/bedtools/sort/main.nf deleted file mode 100644 index 4122af6..0000000 --- a/modules/nf-core/bedtools/sort/main.nf +++ /dev/null @@ -1,42 +0,0 @@ -process BEDTOOLS_SORT { - tag "$meta.id" - label 'process_single' - - conda "bioconda::bedtools=2.30.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" - - input: - tuple val(meta), path(intervals) - path genome_file - - output: - tuple val(meta), path("*_sorted.${extension}"), emit: sorted - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def genome_cmd = genome_file ? "-g $genome_file" : "" - extension = task.ext.suffix ?: intervals.extension - if ("$intervals" == "${prefix}_sorted.${extension}") { - error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - } - """ - bedtools \\ - sort \\ - -i $intervals \\ - $genome_cmd \\ - $args \\ - > ${prefix}_sorted.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/bedtools/sort/meta.yml b/modules/nf-core/bedtools/sort/meta.yml deleted file mode 100644 index 1b6ebbc..0000000 --- a/modules/nf-core/bedtools/sort/meta.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: bedtools_sort -description: Sorts a feature file by chromosome and other criteria. -keywords: - - bed - - sort -tools: - - bedtools: - description: | - A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. - documentation: https://bedtools.readthedocs.io/en/latest/content/tools/sort.html - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - intervals: - type: file - description: BED/BEDGRAPH - pattern: "*.{bed|bedGraph}" - - genome_file: - type: file - description: | - Optional reference genome 2 column file that defines the expected chromosome order. - pattern: "*.{fai,txt,chromsizes}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - - sorted: - type: file - description: Sorted output file - pattern: "*.${extension}" - - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@Emiller88" - - "@sruthipsuresh" - - "@drpatelh" - - "@chris-cheshire" - - "@adamrtalbot" diff --git a/modules/nf-core/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf deleted file mode 100644 index 7ccf311..0000000 --- a/modules/nf-core/bwa/index/main.nf +++ /dev/null @@ -1,51 +0,0 @@ -process BWA_INDEX { - tag "$fasta" - label 'process_single' - - conda "bioconda::bwa=0.7.17" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : - 'quay.io/biocontainers/bwa:0.7.17--hed695b0_7' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path(bwa) , emit: index - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - mkdir bwa - bwa \\ - index \\ - $args \\ - -p bwa/${fasta.baseName} \\ - $fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ - - stub: - """ - mkdir bwa - - touch bwa/genome.amb - touch bwa/genome.ann - touch bwa/genome.bwt - touch bwa/genome.pac - touch bwa/genome.sa - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/bwa/index/meta.yml b/modules/nf-core/bwa/index/meta.yml deleted file mode 100644 index 2c6cfcd..0000000 --- a/modules/nf-core/bwa/index/meta.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: bwa_index -description: Create BWA index for reference genome -keywords: - - index - - fasta - - genome - - reference -tools: - - bwa: - description: | - BWA is a software package for mapping DNA sequences against - a large reference genome, such as the human genome. - homepage: http://bio-bwa.sourceforge.net/ - documentation: http://www.htslib.org/doc/samtools.html - arxiv: arXiv:1303.3997 - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing reference information. - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Input genome fasta file -output: - - meta: - type: map - description: | - Groovy Map containing reference information. - e.g. [ id:'test', single_end:false ] - - index: - type: file - description: BWA genome index files - pattern: "*.{amb,ann,bwt,pac,sa}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@maxulysse" diff --git a/modules/nf-core/bwa/mem/bwa-mem.diff b/modules/nf-core/bwa/mem/bwa-mem.diff deleted file mode 100644 index 8927c85..0000000 --- a/modules/nf-core/bwa/mem/bwa-mem.diff +++ /dev/null @@ -1,102 +0,0 @@ -Changes in module 'nf-core/bwa/mem' ---- modules/nf-core/bwa/mem/main.nf -+++ modules/nf-core/bwa/mem/main.nf -@@ -1,16 +1,18 @@ - process BWA_MEM { - tag "$meta.id" -- label 'process_high' -+ label 'process_medium' - -- conda "bioconda::bwa=0.7.17 bioconda::samtools=1.16.1" -+ conda (params.enable_conda ? "bioconda::bwa=0.7.17 bioconda::samtools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? -- 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' : -- 'quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' }" -+ 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:8110a70be2bfe7f75a2ea7f2a89cda4cc7732095-0' : -+ 'quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:8110a70be2bfe7f75a2ea7f2a89cda4cc7732095-0' }" - - input: - tuple val(meta), path(reads) - tuple val(meta2), path(index) - val sort_bam -+ val sequence_type -+ val lengths - - output: - tuple val(meta), path("*.bam"), emit: bam -@@ -20,24 +22,55 @@ - task.ext.when == null || task.ext.when - - script: -- def args = task.ext.args ?: '' -- def args2 = task.ext.args2 ?: '' -- def prefix = task.ext.prefix ?: "${meta.id}" -- def samtools_command = sort_bam ? 'sort' : 'view' -- """ -- INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` -+ def args = task.ext.args ?: '' -+ def args2 = task.ext.args2 ?: '' -+ def args3 = task.ext.args3 ?: '' -+ def prefix = task.ext.prefix ?: "${meta.id}" -+ if (sequence_type == 'PE' && params.method == 'denovo') { -+ """ -+ INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` -+ -+ echo "${lengths.join("\n")}" > lengths.txt -+ MLEN=\$(awk '{ print length() | "sort -rn" }' lengths.txt | head -1) -+ INSERT=\$(( \$MLEN * 2 )) -+ INSERTH=\$(( \$INSERT + 100 )) -+ INSERTL=\$(( \$INSERT - 100 )) -+ SD=\$(( \$INSERT / 5 )) - -- bwa mem \\ -- $args \\ -- -t $task.cpus \\ -- \$INDEX \\ -- $reads \\ -- | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - -+ bwa mem \\ -+ $args \\ -+ -I \$INSERT,\$SD,\$INSERTH,\$INSERTL \\ -+ -R "@RG\\tID:${prefix}\\tSM:${prefix}\\tPL:Illumina" \\ -+ -t $task.cpus \\ -+ \$INDEX \\ -+ $reads \\ -+ | samtools view $args2 \\ -+ | samtools sort $args3 --threads $task.cpus -o ${prefix}.bam - - -- cat <<-END_VERSIONS > versions.yml -- "${task.process}": -- bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') -- samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') -- END_VERSIONS -- """ -+cat <<-END_VERSIONS > versions.yml -+"${task.process}": -+ bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') -+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') -+END_VERSIONS -+ """ -+ } else { -+ """ -+ INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` -+ -+ bwa mem \\ -+ $args \\ -+ -R "@RG\\tID:${prefix}\\tSM:${prefix}\\tPL:Illumina" \\ -+ -t $task.cpus \\ -+ \$INDEX \\ -+ $reads \\ -+ | samtools view $args2 \\ -+ | samtools sort $args3 --threads $task.cpus -o ${prefix}.bam - -+ -+cat <<-END_VERSIONS > versions.yml -+"${task.process}": -+ bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') -+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') -+END_VERSIONS -+ """ -+ } - } - -************************************************************ diff --git a/modules/nf-core/bwa/mem/main.nf b/modules/nf-core/bwa/mem/main.nf deleted file mode 100644 index 2fe8a8e..0000000 --- a/modules/nf-core/bwa/mem/main.nf +++ /dev/null @@ -1,76 +0,0 @@ -process BWA_MEM { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::bwa=0.7.17 bioconda::samtools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:8110a70be2bfe7f75a2ea7f2a89cda4cc7732095-0' : - 'quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:8110a70be2bfe7f75a2ea7f2a89cda4cc7732095-0' }" - - input: - tuple val(meta), path(reads) - tuple val(meta2), path(index) - val sort_bam - val sequence_type - val lengths - - output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def args3 = task.ext.args3 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - if (sequence_type == 'PE' && params.method == 'denovo') { - """ - INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` - - echo "${lengths.join("\n")}" > lengths.txt - MLEN=\$(awk '{ print length() | "sort -rn" }' lengths.txt | head -1) - INSERT=\$(( \$MLEN * 2 )) - INSERTH=\$(( \$INSERT + 100 )) - INSERTL=\$(( \$INSERT - 100 )) - SD=\$(( \$INSERT / 5 )) - - bwa mem \\ - $args \\ - -I \$INSERT,\$SD,\$INSERTH,\$INSERTL \\ - -R "@RG\\tID:${prefix}\\tSM:${prefix}\\tPL:Illumina" \\ - -t $task.cpus \\ - \$INDEX \\ - $reads \\ - | samtools view $args2 \\ - | samtools sort $args3 --threads $task.cpus -o ${prefix}.bam - - -cat <<-END_VERSIONS > versions.yml -"${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') -END_VERSIONS - """ - } else { - """ - INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` - - bwa mem \\ - $args \\ - -R "@RG\\tID:${prefix}\\tSM:${prefix}\\tPL:Illumina" \\ - -t $task.cpus \\ - \$INDEX \\ - $reads \\ - | samtools view $args2 \\ - | samtools sort $args3 --threads $task.cpus -o ${prefix}.bam - - -cat <<-END_VERSIONS > versions.yml -"${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') -END_VERSIONS - """ - } -} diff --git a/modules/nf-core/bwa/mem/meta.yml b/modules/nf-core/bwa/mem/meta.yml deleted file mode 100644 index 62357bf..0000000 --- a/modules/nf-core/bwa/mem/meta.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: bwa_mem -description: Performs fastq alignment to a fasta reference using BWA -keywords: - - mem - - bwa - - alignment - - map - - fastq - - bam - - sam -tools: - - bwa: - description: | - BWA is a software package for mapping DNA sequences against - a large reference genome, such as the human genome. - homepage: http://bio-bwa.sourceforge.net/ - documentation: http://www.htslib.org/doc/samtools.html - arxiv: arXiv:1303.3997 - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - - meta2: - type: map - description: | - Groovy Map containing reference information. - e.g. [ id:'test', single_end:false ] - - index: - type: file - description: BWA genome index files - pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" - - sort_bam: - type: boolean - description: use samtools sort (true) or samtools view (false) - pattern: "true or false" -output: - - bam: - type: file - description: Output BAM file containing read alignments - pattern: "*.{bam}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@jeremy1805" diff --git a/modules/nf-core/bwamem2/index/main.nf b/modules/nf-core/bwamem2/index/main.nf deleted file mode 100644 index a236121..0000000 --- a/modules/nf-core/bwamem2/index/main.nf +++ /dev/null @@ -1,49 +0,0 @@ -process BWAMEM2_INDEX { - tag "$fasta" - label 'process_single' - - conda "bioconda::bwa-mem2=2.2.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bwa-mem2:2.2.1--he513fc3_0' : - 'quay.io/biocontainers/bwa-mem2:2.2.1--he513fc3_0' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path("bwamem2"), emit: index - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - mkdir bwamem2 - bwa-mem2 \\ - index \\ - $args \\ - $fasta -p bwamem2/${fasta} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') - END_VERSIONS - """ - - stub: - """ - mkdir bwamem2 - touch bwamem2/${fasta}.0123 - touch bwamem2/${fasta}.ann - touch bwamem2/${fasta}.pac - touch bwamem2/${fasta}.amb - touch bwamem2/${fasta}.bwt.2bit.64 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') - END_VERSIONS - """ -} diff --git a/modules/nf-core/bwamem2/index/meta.yml b/modules/nf-core/bwamem2/index/meta.yml deleted file mode 100644 index 40c26c3..0000000 --- a/modules/nf-core/bwamem2/index/meta.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: bwamem2_index -description: Create BWA-mem2 index for reference genome -keywords: - - index - - fasta - - genome - - reference -tools: - - bwamem2: - description: | - BWA-mem2 is a software package for mapping DNA sequences against - a large reference genome, such as the human genome. - homepage: https://github.com/bwa-mem2/bwa-mem2 - documentation: https://github.com/bwa-mem2/bwa-mem2#usage - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Input genome fasta file -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - index: - type: file - description: BWA genome index files - pattern: "*.{0123,amb,ann,bwt.2bit.64,pac}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@maxulysse" diff --git a/modules/nf-core/bwamem2/mem/bwamem2-mem.diff b/modules/nf-core/bwamem2/mem/bwamem2-mem.diff deleted file mode 100644 index 2be4500..0000000 --- a/modules/nf-core/bwamem2/mem/bwamem2-mem.diff +++ /dev/null @@ -1,108 +0,0 @@ -Changes in module 'nf-core/bwamem2/mem' ---- modules/nf-core/bwamem2/mem/main.nf -+++ modules/nf-core/bwamem2/mem/main.nf -@@ -2,7 +2,7 @@ - tag "$meta.id" - label 'process_high' - -- conda "bioconda::bwa-mem2=2.2.1 bioconda::samtools=1.16.1" -+ conda (params.enable_conda ? "bioconda::bwa-mem2=2.2.1 bioconda::samtools=1.16.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' : - 'quay.io/biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' }" -@@ -11,6 +11,8 @@ - tuple val(meta), path(reads) - tuple val(meta2), path(index) - val sort_bam -+ val sequence_type -+ val lengths - - output: - tuple val(meta), path("*.bam"), emit: bam -@@ -20,28 +22,58 @@ - task.ext.when == null || task.ext.when - - script: -- def args = task.ext.args ?: '' -- def args2 = task.ext.args2 ?: '' -- def prefix = task.ext.prefix ?: "${meta.id}" -- def samtools_command = sort_bam ? 'sort' : 'view' -- """ -- INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` -+ def args = task.ext.args ?: '' -+ def args2 = task.ext.args2 ?: '' -+ def args3 = task.ext.args3 ?: '' -+ def prefix = task.ext.prefix ?: "${meta.id}" -+ if (sequence_type == 'PE' && params.method == 'denovo') { -+ """ -+ INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` -+ -+ echo "${lengths.join("\n")}" > lengths.txt -+ MLEN=\$(awk '{ print length() | "sort -rn" }' lengths.txt | head -1) -+ INSERT=\$(( \$MLEN * 2 )) -+ INSERTH=\$(( \$INSERT + 100 )) -+ INSERTL=\$(( \$INSERT - 100 )) -+ SD=\$(( \$INSERT / 5 )) -+ -+ bwa-mem2 \\ -+ mem \\ -+ $args \\ -+ -I \$INSERT,\$SD,\$INSERTH,\$INSERTL \\ -+ -R "@RG\\tID:${prefix}\\tSM:${prefix}\\tPL:Illumina" \\ -+ -t $task.cpus \\ -+ \$INDEX \\ -+ $reads \\ -+ | samtools view $args2 \\ -+ | samtools sort $args3 --threads $task.cpus -o ${prefix}.bam - -+ -+cat <<-END_VERSIONS > versions.yml -+"${task.process}": -+ bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') -+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') -+END_VERSIONS -+ """ -+ } else { -+ """ -+ INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` -+ bwa-mem2 \\ -+ mem \\ -+ $args \\ -+ -R "@RG\\tID:${prefix}\\tSM:${prefix}\\tPL:Illumina" \\ -+ -t $task.cpus \\ -+ \$INDEX \\ -+ $reads \\ -+ | samtools view $args2 \\ -+ | samtools sort $args3 --threads $task.cpus -o ${prefix}.bam - - -- bwa-mem2 \\ -- mem \\ -- $args \\ -- -t $task.cpus \\ -- \$INDEX \\ -- $reads \\ -- | samtools $samtools_command $args2 -@ $task.cpus -o ${prefix}.bam - -- -- cat <<-END_VERSIONS > versions.yml -- "${task.process}": -- bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') -- samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') -- END_VERSIONS -- """ -- -+cat <<-END_VERSIONS > versions.yml -+"${task.process}": -+ bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') -+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') -+END_VERSIONS -+ """ -+ } - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ -@@ -52,4 +84,4 @@ - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ --} -+} -************************************************************ diff --git a/modules/nf-core/bwamem2/mem/main.nf b/modules/nf-core/bwamem2/mem/main.nf deleted file mode 100644 index 135be74..0000000 --- a/modules/nf-core/bwamem2/mem/main.nf +++ /dev/null @@ -1,87 +0,0 @@ -process BWAMEM2_MEM { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? "bioconda::bwa-mem2=2.2.1 bioconda::samtools=1.16.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' : - 'quay.io/biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' }" - - input: - tuple val(meta), path(reads) - tuple val(meta2), path(index) - val sort_bam - val sequence_type - val lengths - - output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def args3 = task.ext.args3 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - if (sequence_type == 'PE' && params.method == 'denovo') { - """ - INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` - - echo "${lengths.join("\n")}" > lengths.txt - MLEN=\$(awk '{ print length() | "sort -rn" }' lengths.txt | head -1) - INSERT=\$(( \$MLEN * 2 )) - INSERTH=\$(( \$INSERT + 100 )) - INSERTL=\$(( \$INSERT - 100 )) - SD=\$(( \$INSERT / 5 )) - - bwa-mem2 \\ - mem \\ - $args \\ - -I \$INSERT,\$SD,\$INSERTH,\$INSERTL \\ - -R "@RG\\tID:${prefix}\\tSM:${prefix}\\tPL:Illumina" \\ - -t $task.cpus \\ - \$INDEX \\ - $reads \\ - | samtools view $args2 \\ - | samtools sort $args3 --threads $task.cpus -o ${prefix}.bam - - -cat <<-END_VERSIONS > versions.yml -"${task.process}": - bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') -END_VERSIONS - """ - } else { - """ - INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` - bwa-mem2 \\ - mem \\ - $args \\ - -R "@RG\\tID:${prefix}\\tSM:${prefix}\\tPL:Illumina" \\ - -t $task.cpus \\ - \$INDEX \\ - $reads \\ - | samtools view $args2 \\ - | samtools sort $args3 --threads $task.cpus -o ${prefix}.bam - - -cat <<-END_VERSIONS > versions.yml -"${task.process}": - bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') -END_VERSIONS - """ - } - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.bam - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} \ No newline at end of file diff --git a/modules/nf-core/bwamem2/mem/meta.yml b/modules/nf-core/bwamem2/mem/meta.yml deleted file mode 100644 index a465551..0000000 --- a/modules/nf-core/bwamem2/mem/meta.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: bwamem2_mem -description: Performs fastq alignment to a fasta reference using BWA -keywords: - - mem - - bwa - - alignment - - map - - fastq - - bam - - sam -tools: - - bwa: - description: | - BWA-mem2 is a software package for mapping DNA sequences against - a large reference genome, such as the human genome. - homepage: https://github.com/bwa-mem2/bwa-mem2 - documentation: http://www.htslib.org/doc/samtools.html - arxiv: arXiv:1303.3997 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - - index: - type: file - description: BWA genome index files - pattern: "Directory containing BWA index *.{0132,amb,ann,bwt.2bit.64,pac}" - - sort_bam: - type: boolean - description: use samtools sort (true) or samtools view (false) - pattern: "true or false" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: Output BAM file containing read alignments - pattern: "*.{bam}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@maxulysse" diff --git a/modules/nf-core/cdhit/cdhit/cdhit-cdhit.diff b/modules/nf-core/cdhit/cdhit/cdhit-cdhit.diff deleted file mode 100644 index 5111f39..0000000 --- a/modules/nf-core/cdhit/cdhit/cdhit-cdhit.diff +++ /dev/null @@ -1,92 +0,0 @@ -Changes in module 'nf-core/cdhit/cdhit' ---- modules/nf-core/cdhit/cdhit/main.nf -+++ modules/nf-core/cdhit/cdhit/main.nf -@@ -1,36 +1,65 @@ --process CDHIT_CDHIT { -+process CDHIT { - tag "$meta.id" -- label 'process_medium' -+ label 'process_high' - -- conda "bioconda::cd-hit=4.8.1" -+ conda (params.enable_conda ? 'bioconda::cd-hit=4.8.1' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? -- 'https://depot.galaxyproject.org/singularity/cd-hit%3A4.8.1--h5b5514e_7': -- 'quay.io/biocontainers/cd-hit:4.8.1--h5b5514e_7' }" -+ 'https://depot.galaxyproject.org/singularity/cd-hit:4.8.1--hdbcaa40_0' : -+ 'quay.io/biocontainers/cd-hit:4.8.1--hdbcaa40_0' }" - - input: -- tuple val(meta), path(sequences) -+ tuple val (meta), path (fasta) // [[:], forward reads] -+ tuple val (meta2), path (totaluniqseq) -+ val type - - output: -- tuple val(meta), path("*.fasta") ,emit: fasta -- tuple val(meta), path("*.clstr") ,emit: clusters -- path "versions.yml" ,emit: versions -+ tuple val (meta), path ("*.clstr") , emit: cdhit_cluster -+ tuple val (meta), path ("uniq.F.fasta") , emit: forward_uniq -+ path 'versions.yml' , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: -+ def prefix = task.ext.prefix ?: "${meta.id}" - def args = task.ext.args ?: '' -- def prefix = task.ext.prefix ?: "${meta.id}" -- """ -- cd-hit \\ -- -i $sequences \\ -- -o ${prefix}.fasta \\ -- -M $task.memory.mega \\ -- -T $task.cpus -+ if (type == 'PE') { -+ """ -+ sed -e 's/NNNNNNNNNN/ /g' ${fasta} | cut -f1 > uniq.F.fasta -+ -+ cd-hit-est \\ -+ -i uniq.F.fasta \\ -+ -o ${prefix} \\ -+ -M ${task.memory.mega} \\ -+ -T ${task.cpus} \\ -+ ${args} - -- cat <<-END_VERSIONS > versions.yml -- "${task.process}": -- cdhit: \$(cd-hit -h | head -n 1 | sed 's/^.*====== CD-HIT version //;s/ (built on .*) ======//' ) -- END_VERSIONS -- """ --} -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ cdhit: \$(cd-hit -h | head -n 1 | sed 's/^.*====== CD-HIT version //;s/ (built on .*) ======//' ) -+ END_VERSIONS -+ """ -+ } else { -+ // with random end assembly versions totaluniqseq is based only on the forward reads -+ // uniq reduces down the dataset -+ // Gets rid of any reduncy -+ // cluster ids might match more than 1 reads -+ // Random Shearing -+ """ -+ sed -e 's/NNNNNNNNNN/ /g' ${totaluniqseq} | cut -f1 | sort | \\ -+ uniq | \\ -+ awk '{c= c + 1; print ">dDocent_Contig_" c "\\n" \$1}' > uniq.F.fasta -+ -+ cd-hit-est -i uniq.F.fasta \\ -+ -o ${prefix} \\ -+ -M ${task.memory.mega} \\ -+ -T ${task.cpus} \\ -+ ${args} -+ -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ cdhit: \$(cd-hit -h | head -n 1 | sed 's/^.*====== CD-HIT version //;s/ (built on .*) ======//' ) -+ END_VERSIONS -+ """ -+ } -+} -************************************************************ diff --git a/modules/nf-core/cdhit/cdhit/main.nf b/modules/nf-core/cdhit/cdhit/main.nf deleted file mode 100644 index 31b20fe..0000000 --- a/modules/nf-core/cdhit/cdhit/main.nf +++ /dev/null @@ -1,65 +0,0 @@ -process CDHIT { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? 'bioconda::cd-hit=4.8.1' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cd-hit:4.8.1--hdbcaa40_0' : - 'quay.io/biocontainers/cd-hit:4.8.1--hdbcaa40_0' }" - - input: - tuple val (meta), path (fasta) // [[:], forward reads] - tuple val (meta2), path (totaluniqseq) - val type - - output: - tuple val (meta), path ("*.clstr") , emit: cdhit_cluster - tuple val (meta), path ("uniq.F.fasta") , emit: forward_uniq - path 'versions.yml' , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - def args = task.ext.args ?: '' - if (type == 'PE') { - """ - sed -e 's/NNNNNNNNNN/ /g' ${fasta} | cut -f1 > uniq.F.fasta - - cd-hit-est \\ - -i uniq.F.fasta \\ - -o ${prefix} \\ - -M ${task.memory.mega} \\ - -T ${task.cpus} \\ - ${args} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cdhit: \$(cd-hit -h | head -n 1 | sed 's/^.*====== CD-HIT version //;s/ (built on .*) ======//' ) - END_VERSIONS - """ - } else { - // with random end assembly versions totaluniqseq is based only on the forward reads - // uniq reduces down the dataset - // Gets rid of any reduncy - // cluster ids might match more than 1 reads - // Random Shearing - """ - sed -e 's/NNNNNNNNNN/ /g' ${totaluniqseq} | cut -f1 | sort | \\ - uniq | \\ - awk '{c= c + 1; print ">dDocent_Contig_" c "\\n" \$1}' > uniq.F.fasta - - cd-hit-est -i uniq.F.fasta \\ - -o ${prefix} \\ - -M ${task.memory.mega} \\ - -T ${task.cpus} \\ - ${args} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cdhit: \$(cd-hit -h | head -n 1 | sed 's/^.*====== CD-HIT version //;s/ (built on .*) ======//' ) - END_VERSIONS - """ - } -} \ No newline at end of file diff --git a/modules/nf-core/cdhit/cdhit/meta.yml b/modules/nf-core/cdhit/cdhit/meta.yml deleted file mode 100644 index ae4c8eb..0000000 --- a/modules/nf-core/cdhit/cdhit/meta.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: "cdhit_cdhit" -description: Cluster protein sequences using sequence similarity -keywords: - - cluster - - protein - - alignment - - fasta -tools: - - "cdhit": - description: "Clusters and compares protein or nucleotide sequences" - homepage: "https://sites.google.com/view/cd-hit/home" - documentation: "https://github.com/weizhongli/cdhit/wiki" - tool_dev_url: "https://github.com/weizhongli/cdhit" - doi: "10.1093/bioinformatics/btl158" - licence: "['GPL v2']" - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - sequences: - type: file - description: fasta file of sequences to be clustered - pattern: "*.{fa,fasta}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - fasta: - type: file - description: fasta file of the representative sequences for each cluster - pattern: "*.{fasta}" - - clusters: - type: file - description: List of clusters - pattern: "*.{clstr}" - -authors: - - "@timslittle" diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index 3df2176..0000000 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.13" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index 60b546a..0000000 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" - -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100755 index da03340..0000000 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python - - -"""Provide functions to merge multiple versions.yml files.""" - - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - """Generate a tabular HTML output of all versions for MultiQC.""" - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
Process Name Software Version
{process if (i == 0) else ''}{tool}{version}
") - return "\\n".join(html) - - -def main(): - """Load all version files and generate merged output.""" - versions_this_module = {} - versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, - } - - with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - - # aggregate versions by the module name (derived from fully-qualified process name) - versions_by_module = {} - for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - - versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", - } - - versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), - } - - with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) - with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - - with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) - - -if __name__ == "__main__": - main() diff --git a/modules/nf-core/fastp/fastp.diff b/modules/nf-core/fastp/fastp.diff deleted file mode 100644 index 7763642..0000000 --- a/modules/nf-core/fastp/fastp.diff +++ /dev/null @@ -1,230 +0,0 @@ -Changes in module 'nf-core/fastp' ---- modules/nf-core/fastp/main.nf -+++ modules/nf-core/fastp/main.nf -@@ -2,16 +2,17 @@ - tag "$meta.id" - label 'process_medium' - -- conda "bioconda::fastp=0.23.2" -+ conda (params.enable_conda ? 'bioconda::fastp=0.23.2' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h79da9fb_0' : - 'quay.io/biocontainers/fastp:0.23.2--h79da9fb_0' }" - - input: - tuple val(meta), path(reads) -- path adapter_fasta -+ tuple val(meta2), path(uniq_full_fasta) - val save_trimmed_fail - val save_merged -+ val denovo_construction - - output: - tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads -@@ -21,6 +22,9 @@ - path "versions.yml" , emit: versions - tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail - tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged -+ tuple val(meta), path('*.uniq.fasta') , optional:true, emit: fasta -+ tuple val(meta), path('*.totaluniqseq') , optional:true, emit: totaluniqseq -+ //stdout emit: tosystem - - when: - task.ext.when == null || task.ext.when -@@ -28,35 +32,89 @@ - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" -- def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" - def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' -+ def umi_barcodes = "${meta.umi_barcodes}" ?: '' - // Added soft-links to original fastqs for consistent naming in MultiQC - // Use single ended for interleaved. Add --interleaved_in in config. - if ( task.ext.args?.contains('--interleaved_in') ) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz -- - fastp \\ - --stdout \\ - --in1 ${prefix}.fastq.gz \\ - --thread $task.cpus \\ - --json ${prefix}.fastp.json \\ - --html ${prefix}.fastp.html \\ -- $adapter_list \\ - $fail_fastq \\ - $args \\ - 2> ${prefix}.fastp.log \\ - | gzip -c > ${prefix}.fastp.fastq.gz -- -- cat <<-END_VERSIONS > versions.yml -- "${task.process}": -- fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") -- END_VERSIONS -- """ -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") -+ END_VERSIONS -+ """ -+ } else if (denovo_construction && meta.single_end) { -+ """ -+ MaxLen="\$(awk '!/>/' ${uniq_full_fasta} | \\ -+ awk '(NR==1||length fastp.log -+ -+ # Fastq back to Fasta -+ gunzip ${prefix}.fastp.fastq.gz -+ awk 'BEGIN{P=1}{if(P==1||P==2){gsub(/^[@]/,">");print}; if(P==4)P=0; P++}' ${prefix}.fastp.fastq | \\ -+ paste - - | \\ -+ sort -k1,1 -V | \\ -+ tr "\\t" "\\n" > ${prefix}.uniq.fasta -+ -+ awk '!/>/' ${prefix}.uniq.fasta > ${prefix}.totaluniqseq -+ -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") -+ BusyBox: \$(busybox | sed -n -E 's/.*v([[:digit:].]+)\\s\\(.*/\\1/p') -+ END_VERSIONS -+ """ -+ } else if (denovo_construction && !meta.single_end) { -+ """ -+ MaxLen="\$(awk '!/>/' ${uniq_full_fasta} | \\ -+ awk '(NR==1||length fastp.log -+ -+ # Fastq back to Fasta -+ gunzip ${prefix}.fastp.fastq.gz -+ awk 'BEGIN{P=1}{if(P==1||P==2){gsub(/^[@]/,">");print}; if(P==4)P=0; P++}' ${prefix}.fastp.fastq | \\ -+ paste - - | \\ -+ sort -k1,1 -V | \\ -+ tr "\\t" "\\n" > ${prefix}.uniq.fasta -+ -+ awk '!/>/' ${prefix}.uniq.fasta > ${prefix}.totaluniqseq -+ -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") -+ BusyBox: \$(busybox | sed -n -E 's/.*v([[:digit:].]+)\\s\\(.*/\\1/p') -+ END_VERSIONS -+ """ - } else if (meta.single_end) { - """ -- [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz -- -+ [ ! -f ${prefix}.fastq.gz ] && ln -sf ${reads} ${prefix}.fastq.gz - fastp \\ - --stdout \\ - --in1 ${prefix}.fastq.gz \\ -@@ -64,18 +122,17 @@ - --thread $task.cpus \\ - --json ${prefix}.fastp.json \\ - --html ${prefix}.fastp.html \\ -- $adapter_list \\ -- $fail_fastq \\ -- $args \\ -- 2> ${prefix}.fastp.log -- -- cat <<-END_VERSIONS > versions.yml -- "${task.process}": -- fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") -- END_VERSIONS -- """ -- } else { -+ $fail_fastq \\ -+ $args \\ -+ 2> ${prefix}.fastp.log -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") -+ END_VERSIONS -+ """ -+ } else if (!meta.single_end && meta.umi_barcodes) { - def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' -+ def umi_args = task.ext.umi_args ?: '' - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz -@@ -86,14 +143,54 @@ - --out2 ${prefix}_2.fastp.fastq.gz \\ - --json ${prefix}.fastp.json \\ - --html ${prefix}.fastp.html \\ -- $adapter_list \\ - $fail_fastq \\ - $merge_fastq \\ - --thread $task.cpus \\ -- --detect_adapter_for_pe \\ -- $args \\ -- 2> ${prefix}.fastp.log -- -+ $args \\ -+ $umi_args \\ -+ 2> ${prefix}.fastp.log -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") -+ END_VERSIONS -+ """ -+ } else if (meta.single_end && meta.umi_barcodes) { -+ def umi_args = task.ext.umi_args ?: '' -+ """ -+ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz -+ fastp \\ -+ --stdout \\ -+ --in1 ${prefix}.fastq.gz \\ -+ --out1 ${prefix}.fastp.fastq.gz \\ -+ --thread $task.cpus \\ -+ --json ${prefix}.fastp.json \\ -+ --html ${prefix}.fastp.html \\ -+ $fail_fastq \\ -+ $umi_args \\ -+ $args \\ -+ 2> ${prefix}.fastp.log -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") -+ END_VERSIONS -+ """ -+ } else { -+ def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' -+ """ -+ [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz -+ [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz -+ fastp \\ -+ --in1 ${prefix}_1.fastq.gz \\ -+ --in2 ${prefix}_2.fastq.gz \\ -+ --out1 ${prefix}_1.fastp.fastq.gz \\ -+ --out2 ${prefix}_2.fastp.fastq.gz \\ -+ --json ${prefix}.fastp.json \\ -+ --html ${prefix}.fastp.html \\ -+ $fail_fastq \\ -+ $merge_fastq \\ -+ --thread $task.cpus \\ -+ $args \\ -+ 2> ${prefix}.fastp.log - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") -@@ -101,3 +198,4 @@ - """ - } - } -+ - -************************************************************ diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf deleted file mode 100644 index 6e9f2e0..0000000 --- a/modules/nf-core/fastp/main.nf +++ /dev/null @@ -1,201 +0,0 @@ -process FASTP { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::fastp=0.23.2' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h79da9fb_0' : - 'quay.io/biocontainers/fastp:0.23.2--h79da9fb_0' }" - - input: - tuple val(meta), path(reads) - tuple val(meta2), path(uniq_full_fasta) - val save_trimmed_fail - val save_merged - val denovo_construction - - output: - tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads - tuple val(meta), path('*.json') , emit: json - tuple val(meta), path('*.html') , emit: html - tuple val(meta), path('*.log') , emit: log - path "versions.yml" , emit: versions - tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail - tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged - tuple val(meta), path('*.uniq.fasta') , optional:true, emit: fasta - tuple val(meta), path('*.totaluniqseq') , optional:true, emit: totaluniqseq - //stdout emit: tosystem - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' - def umi_barcodes = "${meta.umi_barcodes}" ?: '' - // Added soft-links to original fastqs for consistent naming in MultiQC - // Use single ended for interleaved. Add --interleaved_in in config. - if ( task.ext.args?.contains('--interleaved_in') ) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz - fastp \\ - --stdout \\ - --in1 ${prefix}.fastq.gz \\ - --thread $task.cpus \\ - --json ${prefix}.fastp.json \\ - --html ${prefix}.fastp.html \\ - $fail_fastq \\ - $args \\ - 2> ${prefix}.fastp.log \\ - | gzip -c > ${prefix}.fastp.fastq.gz - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") - END_VERSIONS - """ - } else if (denovo_construction && meta.single_end) { - """ - MaxLen="\$(awk '!/>/' ${uniq_full_fasta} | \\ - awk '(NR==1||length fastp.log - - # Fastq back to Fasta - gunzip ${prefix}.fastp.fastq.gz - awk 'BEGIN{P=1}{if(P==1||P==2){gsub(/^[@]/,">");print}; if(P==4)P=0; P++}' ${prefix}.fastp.fastq | \\ - paste - - | \\ - sort -k1,1 -V | \\ - tr "\\t" "\\n" > ${prefix}.uniq.fasta - - awk '!/>/' ${prefix}.uniq.fasta > ${prefix}.totaluniqseq - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") - BusyBox: \$(busybox | sed -n -E 's/.*v([[:digit:].]+)\\s\\(.*/\\1/p') - END_VERSIONS - """ - } else if (denovo_construction && !meta.single_end) { - """ - MaxLen="\$(awk '!/>/' ${uniq_full_fasta} | \\ - awk '(NR==1||length fastp.log - - # Fastq back to Fasta - gunzip ${prefix}.fastp.fastq.gz - awk 'BEGIN{P=1}{if(P==1||P==2){gsub(/^[@]/,">");print}; if(P==4)P=0; P++}' ${prefix}.fastp.fastq | \\ - paste - - | \\ - sort -k1,1 -V | \\ - tr "\\t" "\\n" > ${prefix}.uniq.fasta - - awk '!/>/' ${prefix}.uniq.fasta > ${prefix}.totaluniqseq - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") - BusyBox: \$(busybox | sed -n -E 's/.*v([[:digit:].]+)\\s\\(.*/\\1/p') - END_VERSIONS - """ - } else if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -sf ${reads} ${prefix}.fastq.gz - fastp \\ - --stdout \\ - --in1 ${prefix}.fastq.gz \\ - --out1 ${prefix}.fastp.fastq.gz \\ - --thread $task.cpus \\ - --json ${prefix}.fastp.json \\ - --html ${prefix}.fastp.html \\ - $fail_fastq \\ - $args \\ - 2> ${prefix}.fastp.log - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") - END_VERSIONS - """ - } else if (!meta.single_end && meta.umi_barcodes) { - def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' - def umi_args = task.ext.umi_args ?: '' - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz - fastp \\ - --in1 ${prefix}_1.fastq.gz \\ - --in2 ${prefix}_2.fastq.gz \\ - --out1 ${prefix}_1.fastp.fastq.gz \\ - --out2 ${prefix}_2.fastp.fastq.gz \\ - --json ${prefix}.fastp.json \\ - --html ${prefix}.fastp.html \\ - $fail_fastq \\ - $merge_fastq \\ - --thread $task.cpus \\ - $args \\ - $umi_args \\ - 2> ${prefix}.fastp.log - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") - END_VERSIONS - """ - } else if (meta.single_end && meta.umi_barcodes) { - def umi_args = task.ext.umi_args ?: '' - """ - [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz - fastp \\ - --stdout \\ - --in1 ${prefix}.fastq.gz \\ - --out1 ${prefix}.fastp.fastq.gz \\ - --thread $task.cpus \\ - --json ${prefix}.fastp.json \\ - --html ${prefix}.fastp.html \\ - $fail_fastq \\ - $umi_args \\ - $args \\ - 2> ${prefix}.fastp.log - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") - END_VERSIONS - """ - } else { - def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz - fastp \\ - --in1 ${prefix}_1.fastq.gz \\ - --in2 ${prefix}_2.fastq.gz \\ - --out1 ${prefix}_1.fastp.fastq.gz \\ - --out2 ${prefix}_2.fastp.fastq.gz \\ - --json ${prefix}.fastp.json \\ - --html ${prefix}.fastp.html \\ - $fail_fastq \\ - $merge_fastq \\ - --thread $task.cpus \\ - $args \\ - 2> ${prefix}.fastp.log - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") - END_VERSIONS - """ - } -} - diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml deleted file mode 100644 index 6f6fad7..0000000 --- a/modules/nf-core/fastp/meta.yml +++ /dev/null @@ -1,73 +0,0 @@ -name: fastp -description: Perform adapter/quality trimming on sequencing reads -keywords: - - trimming - - quality control - - fastq -tools: - - fastp: - description: | - A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. - documentation: https://github.com/OpenGene/fastp - doi: https://doi.org/10.1093/bioinformatics/bty560 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. If you wish to run interleaved paired-end data, supply as single-end data - but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. - - adapter_fasta: - type: file - description: File in FASTA format containing possible adapters to remove. - pattern: "*.{fasta,fna,fas,fa}" - - save_trimmed_fail: - type: boolean - description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` - - save_merged: - type: boolean - description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: The trimmed/modified/unmerged fastq reads - pattern: "*fastp.fastq.gz" - - json: - type: file - description: Results in JSON format - pattern: "*.json" - - html: - type: file - description: Results in HTML format - pattern: "*.html" - - log: - type: file - description: fastq log file - pattern: "*.log" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - reads_fail: - type: file - description: Reads the failed the preprocessing - pattern: "*fail.fastq.gz" - - reads_merged: - type: file - description: Reads that were successfully merged - pattern: "*.{merged.fastq.gz}" -authors: - - "@drpatelh" - - "@kevinmenden" diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 0000000..691d4c7 --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 9ae5838..752c3a1 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,10 +2,10 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda "bioconda::fastqc=0.11.9" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) @@ -24,16 +24,29 @@ process FASTQC { // Make list of old name and new name pairs to use for renaming in the bash while loop def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } def rename_to = old_new_pairs*.join(' ').join(' ') - def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + def renamed_files = old_new_pairs.collect{ _old_name, new_name -> new_name }.join(' ') + + // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) + // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 + // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + """ printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name done - fastqc $args --threads $task.cpus $renamed_files + + fastqc \\ + $args \\ + --threads $task.cpus \\ + --memory $fastqc_memory \\ + $renamed_files cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -45,7 +58,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 4da5bb5..2b2e62b 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -11,42 +11,57 @@ tools: FastQC gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other overrepresented sequences. homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ licence: ["GPL-2.0-only"] + identifier: biotools:fastqc input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: FastQC report + pattern: "*_{fastqc.html}" - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.zip": + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 0000000..e9d79a0 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,309 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("sarscov2 single-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 interleaved [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [bam] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 multiple [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 custom_prefix - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 0000000..d5db309 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,392 @@ +{ + "sarscov2 custom_prefix": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:16.374038" + }, + "sarscov2 single-end [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:24.993809" + }, + "sarscov2 custom_prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:03:10.93942" + }, + "sarscov2 interleaved [fastq]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:42.355718" + }, + "sarscov2 paired-end [bam]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:53.276274" + }, + "sarscov2 multiple [fastq]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:05.527626" + }, + "sarscov2 paired-end [fastq]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:31.188871" + }, + "sarscov2 paired-end [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:34.273566" + }, + "sarscov2 multiple [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:03:02.304411" + }, + "sarscov2 single-end [fastq]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:19.095607" + }, + "sarscov2 interleaved [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:44.640184" + }, + "sarscov2 paired-end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:53.550742" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 0000000..7834294 --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/freebayes/freebayes.diff b/modules/nf-core/freebayes/freebayes.diff deleted file mode 100644 index 80a22bd..0000000 --- a/modules/nf-core/freebayes/freebayes.diff +++ /dev/null @@ -1,56 +0,0 @@ -Changes in module 'nf-core/freebayes' ---- modules/nf-core/freebayes/main.nf -+++ modules/nf-core/freebayes/main.nf -@@ -1,14 +1,15 @@ - process FREEBAYES { - tag "$meta.id" -- label 'process_single' -+ label 'process_medium' - -- conda "bioconda::freebayes=1.3.6" -+ conda (params.enable_conda ? "bioconda::freebayes=1.3.5" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? -- 'https://depot.galaxyproject.org/singularity/freebayes:1.3.6--hbfe0e7f_2' : -- 'quay.io/biocontainers/freebayes:1.3.6--hbfe0e7f_2' }" -+ 'https://depot.galaxyproject.org/singularity/freebayes:1.3.5--py38ha193a2f_3' : -+ 'quay.io/biocontainers/freebayes:1.3.5--py38ha193a2f_3' }" - - input: -- tuple val(meta), path(input_1), path(input_1_index), path(input_2), path(input_2_index), path(target_bed) -+ tuple val(meta), path(bam), path(bai), path(target_bed) -+ val targets_file - path fasta - path fasta_fai - path samples -@@ -25,12 +26,11 @@ - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" -- def input = input_2 ? "${input_1} ${input_2}" : "${input_1}" - def targets_file = target_bed ? "--target ${target_bed}" : "" - def samples_file = samples ? "--samples ${samples}" : "" - def populations_file = populations ? "--populations ${populations}" : "" - def cnv_file = cnv ? "--cnv-map ${cnv}" : "" -- -+ def interval = meta.interval ? '_' + meta.interval : '' - """ - freebayes \\ - -f $fasta \\ -@@ -39,13 +39,11 @@ - $populations_file \\ - $cnv_file \\ - $args \\ -- $input > ${prefix}.vcf -- -- bgzip ${prefix}.vcf -- -+ $bam | \\ -+ bgzip -c > ${prefix}${interval}.vcf.gz - cat <<-END_VERSIONS > versions.yml - "${task.process}": - freebayes: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' ) - END_VERSIONS - """ --} -+} -************************************************************ diff --git a/modules/nf-core/freebayes/main.nf b/modules/nf-core/freebayes/main.nf deleted file mode 100644 index 3310d97..0000000 --- a/modules/nf-core/freebayes/main.nf +++ /dev/null @@ -1,49 +0,0 @@ -process FREEBAYES { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::freebayes=1.3.5" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/freebayes:1.3.5--py38ha193a2f_3' : - 'quay.io/biocontainers/freebayes:1.3.5--py38ha193a2f_3' }" - - input: - tuple val(meta), path(bam), path(bai), path(target_bed) - val targets_file - path fasta - path fasta_fai - path samples - path populations - path cnv - - output: - tuple val(meta), path("*.vcf.gz"), emit: vcf - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def targets_file = target_bed ? "--target ${target_bed}" : "" - def samples_file = samples ? "--samples ${samples}" : "" - def populations_file = populations ? "--populations ${populations}" : "" - def cnv_file = cnv ? "--cnv-map ${cnv}" : "" - def interval = meta.interval ? '_' + meta.interval : '' - """ - freebayes \\ - -f $fasta \\ - $targets_file \\ - $samples_file \\ - $populations_file \\ - $cnv_file \\ - $args \\ - $bam | \\ - bgzip -c > ${prefix}${interval}.vcf.gz - cat <<-END_VERSIONS > versions.yml - "${task.process}": - freebayes: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' ) - END_VERSIONS - """ -} \ No newline at end of file diff --git a/modules/nf-core/freebayes/meta.yml b/modules/nf-core/freebayes/meta.yml deleted file mode 100644 index cbbd297..0000000 --- a/modules/nf-core/freebayes/meta.yml +++ /dev/null @@ -1,82 +0,0 @@ -name: freebayes -description: A haplotype-based variant detector -keywords: - - variant caller - - SNP - - genotyping - - somatic variant calling - - germline variant calling - - bacterial variant calling - - bayesian - -tools: - - freebayes: - description: Bayesian haplotype-based polymorphism discovery and genotyping - homepage: https://github.com/freebayes/freebayes - documentation: https://github.com/freebayes/freebayes - tool_dev_url: https://github.com/freebayes/freebayes - doi: "arXiv:1207.3907" - licence: ["MIT"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - input_index: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai}" - - target_bed: - type: file - description: Optional - Limit analysis to targets listed in this BED-format FILE. - pattern: "*.bed" - - fasta: - type: file - description: reference fasta file - pattern: ".{fa,fa.gz,fasta,fasta.gz}" - - fasta_fai: - type: file - description: reference fasta file index - pattern: "*.{fa,fasta}.fai" - - samples: - type: file - description: Optional - Limit analysis to samples listed (one per line) in the FILE. - pattern: "*.txt" - - populations: - type: file - description: Optional - Each line of FILE should list a sample and a population which it is part of. - pattern: "*.txt" - - cnv: - type: file - description: | - A copy number map BED file, which has either a sample-level ploidy: - sample_name copy_number - or a region-specific format: - seq_name start end sample_name copy_number - pattern: "*.bed" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - vcf: - type: file - description: Compressed VCF file - pattern: "*.vcf.gz" - -authors: - - "@maxibor" - - "@FriederikeHanssen" - - "@maxulysse" diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf deleted file mode 100644 index 430dbab..0000000 --- a/modules/nf-core/minimap2/align/main.nf +++ /dev/null @@ -1,48 +0,0 @@ -process MINIMAP2_ALIGN { - tag "$meta.id" - label 'process_medium' - - // Note: the versions here need to match the versions used in the mulled container below and minimap2/index - conda "bioconda::minimap2=2.24 bioconda::samtools=1.14" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' : - 'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }" - - input: - tuple val(meta), path(reads) - path reference - val bam_format - val cigar_paf_format - val cigar_bam - - output: - tuple val(meta), path("*.paf"), optional: true, emit: paf - tuple val(meta), path("*.bam"), optional: true, emit: bam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def bam_output = bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" - def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' - def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' - """ - minimap2 \\ - $args \\ - -t $task.cpus \\ - "${reference ?: reads}" \\ - "$reads" \\ - $cigar_paf \\ - $set_cigar_bam \\ - $bam_output - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - minimap2: \$(minimap2 --version 2>&1) - END_VERSIONS - """ -} diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml deleted file mode 100644 index 991b39a..0000000 --- a/modules/nf-core/minimap2/align/meta.yml +++ /dev/null @@ -1,65 +0,0 @@ -name: minimap2_align -description: A versatile pairwise aligner for genomic and spliced nucleotide sequences -keywords: - - align - - fasta - - fastq - - genome - - paf - - reference -tools: - - minimap2: - description: | - A versatile pairwise aligner for genomic and spliced nucleotide sequences. - homepage: https://github.com/lh3/minimap2 - documentation: https://github.com/lh3/minimap2#uguide - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FASTA or FASTQ files of size 1 and 2 for single-end - and paired-end data, respectively. - - reference: - type: file - description: | - Reference database in FASTA format. - - bam_format: - type: boolean - description: Specify that output should be in BAM format - - cigar_paf_format: - type: boolean - description: Specify that output CIGAR should be in PAF format - - cigar_bam: - type: boolean - description: | - Write CIGAR with >65535 ops at the CG tag. This is recommended when - doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - paf: - type: file - description: Alignment in PAF format - pattern: "*.paf" - - bam: - type: file - description: Alignment in BAM format - pattern: "*.bam" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@heuermh" - - "@sofstam" - - "@sateeshperi" - - "@jfy133" diff --git a/modules/nf-core/minimap2/index/main.nf b/modules/nf-core/minimap2/index/main.nf deleted file mode 100644 index 73dd4ee..0000000 --- a/modules/nf-core/minimap2/index/main.nf +++ /dev/null @@ -1,34 +0,0 @@ -process MINIMAP2_INDEX { - label 'process_medium' - - // Note: the versions here need to match the versions used in minimap2/align - conda "bioconda::minimap2=2.24" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/minimap2:2.24--h7132678_1' : - 'quay.io/biocontainers/minimap2:2.24--h7132678_1' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path("*.mmi"), emit: index - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - minimap2 \\ - -t $task.cpus \\ - -d ${fasta.baseName}.mmi \\ - $args \\ - $fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - minimap2: \$(minimap2 --version 2>&1) - END_VERSIONS - """ -} diff --git a/modules/nf-core/minimap2/index/meta.yml b/modules/nf-core/minimap2/index/meta.yml deleted file mode 100644 index b58f35c..0000000 --- a/modules/nf-core/minimap2/index/meta.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: minimap2_index -description: Provides fasta index required by minimap2 alignment. -keywords: - - index - - fasta - - reference -tools: - - minimap2: - description: | - A versatile pairwise aligner for genomic and spliced nucleotide sequences. - homepage: https://github.com/lh3/minimap2 - documentation: https://github.com/lh3/minimap2#uguide - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: | - Reference database in FASTA format. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - index: - type: file - description: Minimap2 fasta index. - pattern: "*.mmi" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@yuukiiwa" - - "@drpatelh" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 0000000..6f5b867 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::multiqc=1.25.1 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 4b60474..cc0643e 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,16 +1,18 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' : + 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" path(multiqc_config) path(extra_multiqc_config) path(multiqc_logo) + path(replace_names) + path(sample_names) output: path "*multiqc_report.html", emit: report @@ -23,14 +25,22 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' """ multiqc \\ --force \\ $args \\ $config \\ + $prefix \\ $extra_config \\ + $logo \\ + $replace \\ + $samples \\ . cat <<-END_VERSIONS > versions.yml @@ -41,8 +51,8 @@ process MULTIQC { stub: """ - touch multiqc_data - touch multiqc_plots + mkdir multiqc_data + mkdir multiqc_plots touch multiqc_report.html cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index ebc29b2..b16c187 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,6 @@ -name: MultiQC -description: Aggregate results from bioinformatics analyses across many samples into a single report +name: multiqc +description: Aggregate results from bioinformatics analyses across many samples into + a single report keywords: - QC - bioinformatics tools @@ -12,44 +13,66 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - + identifier: biotools:multiqc input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. - pattern: "*.{yml,yaml}" - - multiqc_logo: - type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" - + - - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + - - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + - - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" output: - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" + - "*multiqc_report.html": + type: file + description: MultiQC report file + pattern: "multiqc_report.html" - data: - type: dir - description: MultiQC data dir - pattern: "multiqc_data" + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" + - "*_plots": + type: file + description: Plots created by MultiQC + pattern: "*_data" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@abhi18av" - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 0000000..33316a7 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,92 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + config "./nextflow.config" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 0000000..2fcbb5f --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-02T17:51:46.317523" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-02T17:52:20.680978" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-02T17:52:09.185842" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/nextflow.config b/modules/nf-core/multiqc/tests/nextflow.config new file mode 100644 index 0000000..c537a6a --- /dev/null +++ b/modules/nf-core/multiqc/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MULTIQC' { + ext.prefix = null + } +} diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 0000000..bea6c0d --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf deleted file mode 100644 index ce6580d..0000000 --- a/modules/nf-core/samtools/faidx/main.nf +++ /dev/null @@ -1,44 +0,0 @@ -process SAMTOOLS_FAIDX { - tag "$fasta" - label 'process_single' - - conda "bioconda::samtools=1.16.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path ("*.fai"), emit: fai - tuple val(meta), path ("*.gzi"), emit: gzi, optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - samtools \\ - faidx \\ - $args \\ - $fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ - - stub: - """ - touch ${fasta}.fai - cat <<-END_VERSIONS > versions.yml - - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml deleted file mode 100644 index fe2fe9a..0000000 --- a/modules/nf-core/samtools/faidx/meta.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: samtools_faidx -description: Index FASTA file -keywords: - - index - - fasta -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: http://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: FASTA file - pattern: "*.{fa,fasta}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fai: - type: file - description: FASTA index file - pattern: "*.{fai}" - - gzi: - type: file - description: Optional gzip index file for compressed inputs - pattern: "*.gzi" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@ewels" - - "@phue" diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf deleted file mode 100644 index 2120cd7..0000000 --- a/modules/nf-core/samtools/flagstat/main.nf +++ /dev/null @@ -1,35 +0,0 @@ -process SAMTOOLS_FLAGSTAT { - tag "$meta.id" - label 'process_single' - - conda "bioconda::samtools=1.16.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" - - input: - tuple val(meta), path(bam), path(bai) - - output: - tuple val(meta), path("*.flagstat"), emit: flagstat - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - samtools \\ - flagstat \\ - --threads ${task.cpus} \\ - $bam \\ - > ${prefix}.flagstat - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml deleted file mode 100644 index 9526906..0000000 --- a/modules/nf-core/samtools/flagstat/meta.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: samtools_flagstat -description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type -keywords: - - stats - - mapping - - counts - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - bai: - type: file - description: Index for BAM/CRAM/SAM file - pattern: "*.{bai,crai,sai}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - flagstat: - type: file - description: File containing samtools flagstat output - pattern: "*.{flagstat}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf deleted file mode 100644 index a7b87d8..0000000 --- a/modules/nf-core/samtools/idxstats/main.nf +++ /dev/null @@ -1,36 +0,0 @@ -process SAMTOOLS_IDXSTATS { - tag "$meta.id" - label 'process_single' - - conda "bioconda::samtools=1.16.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" - - input: - tuple val(meta), path(bam), path(bai) - - output: - tuple val(meta), path("*.idxstats"), emit: idxstats - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - samtools \\ - idxstats \\ - --threads ${task.cpus-1} \\ - $bam \\ - > ${prefix}.idxstats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml deleted file mode 100644 index 3710ab8..0000000 --- a/modules/nf-core/samtools/idxstats/meta.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: samtools_idxstats -description: Reports alignment summary statistics for a BAM/CRAM/SAM file -keywords: - - stats - - mapping - - counts - - chromosome - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - bai: - type: file - description: Index for BAM/CRAM/SAM file - pattern: "*.{bai,crai,sai}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - idxstats: - type: file - description: File containing samtools idxstats output - pattern: "*.{idxstats}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf deleted file mode 100644 index 8b95687..0000000 --- a/modules/nf-core/samtools/index/main.nf +++ /dev/null @@ -1,48 +0,0 @@ -process SAMTOOLS_INDEX { - tag "$meta.id" - label 'process_low' - - conda "bioconda::samtools=1.16.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" - - input: - tuple val(meta), path(input) - - output: - tuple val(meta), path("*.bai") , optional:true, emit: bai - tuple val(meta), path("*.csi") , optional:true, emit: csi - tuple val(meta), path("*.crai"), optional:true, emit: crai - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - samtools \\ - index \\ - -@ ${task.cpus-1} \\ - $args \\ - $input - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ - - stub: - """ - touch ${input}.bai - touch ${input}.crai - touch ${input}.csi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml deleted file mode 100644 index e5cadbc..0000000 --- a/modules/nf-core/samtools/index/meta.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: samtools_index -description: Index SAM/BAM/CRAM file -keywords: - - index - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - crai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - csi: - type: file - description: CSI index file - pattern: "*.{csi}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@ewels" - - "@maxulysse" diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf deleted file mode 100644 index a80ff3a..0000000 --- a/modules/nf-core/samtools/merge/main.nf +++ /dev/null @@ -1,56 +0,0 @@ -process SAMTOOLS_MERGE { - tag "$meta.id" - label 'process_low' - - conda "bioconda::samtools=1.16.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" - - input: - tuple val(meta), path(input_files, stageAs: "?/*") - path fasta - path fai - - output: - tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam - tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram - tuple val(meta), path("*.csi") , optional:true, emit: csi - path "versions.yml" , emit: versions - - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() - def reference = fasta ? "--reference ${fasta}" : "" - """ - samtools \\ - merge \\ - --threads ${task.cpus-1} \\ - $args \\ - ${reference} \\ - ${prefix}.${file_type} \\ - $input_files - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ - - stub: - prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() - """ - touch ${prefix}.${file_type} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml deleted file mode 100644 index 5bd84bc..0000000 --- a/modules/nf-core/samtools/merge/meta.yml +++ /dev/null @@ -1,62 +0,0 @@ -name: samtools_merge -description: Merge BAM or CRAM file -keywords: - - merge - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input_files: - type: file - description: BAM/CRAM file - pattern: "*.{bam,cram,sam}" - - fasta: - type: optional file - description: Reference file the CRAM was created with - pattern: "*.{fasta,fa}" - - fai: - type: optional file - description: Index of the reference file the CRAM was created with - pattern: "*.fai" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file - pattern: "*.{bam}" - - cram: - type: file - description: CRAM file - pattern: "*.{cram}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - csi: - type: file - description: BAM index file (optional) - pattern: "*.csi" -authors: - - "@drpatelh" - - "@yuukiiwa " - - "@maxulysse" - - "@FriederikeHanssen" diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf deleted file mode 100644 index 0a2a364..0000000 --- a/modules/nf-core/samtools/stats/main.nf +++ /dev/null @@ -1,49 +0,0 @@ -process SAMTOOLS_STATS { - tag "$meta.id" - label 'process_single' - - conda "bioconda::samtools=1.16.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" - - input: - tuple val(meta), path(input), path(input_index) - path fasta - - output: - tuple val(meta), path("*.stats"), emit: stats - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "--reference ${fasta}" : "" - """ - samtools \\ - stats \\ - --threads ${task.cpus} \\ - ${reference} \\ - ${input} \\ - > ${prefix}.stats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.stats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml deleted file mode 100644 index cac50b1..0000000 --- a/modules/nf-core/samtools/stats/meta.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: samtools_stats -description: Produces comprehensive statistics from SAM/BAM/CRAM file -keywords: - - statistics - - counts - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM file from alignment - pattern: "*.{bam,cram}" - - input_index: - type: file - description: BAI/CRAI file from alignment - pattern: "*.{bai,crai}" - - fasta: - type: optional file - description: Reference file the CRAM was created with - pattern: "*.{fasta,fa}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - stats: - type: file - description: File containing samtools stats output - pattern: "*.{stats}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@FriederikeHanssen" diff --git a/modules/nf-core/seqtk/seq/main.nf b/modules/nf-core/seqtk/seq/main.nf deleted file mode 100644 index c1a2653..0000000 --- a/modules/nf-core/seqtk/seq/main.nf +++ /dev/null @@ -1,40 +0,0 @@ -process SEQTK_SEQ { - tag "$meta.id" - label 'process_single' - - conda "bioconda::seqtk=1.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqtk:1.3--h5bf99c6_3' : - 'quay.io/biocontainers/seqtk:1.3--h5bf99c6_3' }" - - input: - tuple val(meta), path(fastx) - - output: - tuple val(meta), path("*.gz") , emit: fastx - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - def extension = "fastq" - if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/ || "$args" ==~ /\-[aA]/ ) { - extension = "fasta" - } - """ - seqtk \\ - seq \\ - $args \\ - $fastx | \\ - gzip -c > ${prefix}.seqtk-seq.${extension}.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/seqtk/seq/meta.yml b/modules/nf-core/seqtk/seq/meta.yml deleted file mode 100644 index 3986257..0000000 --- a/modules/nf-core/seqtk/seq/meta.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: seqtk_seq -description: Common transformation operations on FASTA or FASTQ files. -keywords: - - seq -tools: - - seqtk: - description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format. The seqtk seq command enables common transformation operations on FASTA or FASTQ files. - homepage: https://github.com/lh3/seqtk - documentation: https://docs.csc.fi/apps/seqtk/ - tool_dev_url: https://github.com/lh3/seqtk - licence: ["MIT"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - sequences: - type: file - description: A FASTQ or FASTA file - pattern: "*.{fastq.gz, fastq, fq, fq.gz, fasta, fastq.gz, fa, fa.gz, fas, fas.gz, fna, fna.gz}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - sequences: - type: file - description: FASTQ/FASTA file containing renamed sequences - pattern: "*.{fastq.gz, fasta.gz}" - -authors: - - "@hseabolt" - - "@mjcipriano" - - "@sateeshperi" diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf deleted file mode 100644 index 9a404db..0000000 --- a/modules/nf-core/tabix/tabix/main.nf +++ /dev/null @@ -1,42 +0,0 @@ -process TABIX_TABIX { - tag "$meta.id" - label 'process_single' - - conda "bioconda::tabix=1.11" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : - 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" - - input: - tuple val(meta), path(tab) - - output: - tuple val(meta), path("*.tbi"), optional:true, emit: tbi - tuple val(meta), path("*.csi"), optional:true, emit: csi - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - tabix $args $tab - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${tab}.tbi - cat <<-END_VERSIONS > versions.yml - - "${task.process}": - tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml deleted file mode 100644 index fcc6e52..0000000 --- a/modules/nf-core/tabix/tabix/meta.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: tabix_tabix -description: create tabix index from a sorted bgzip tab-delimited genome file -keywords: - - index - - tabix - - vcf -tools: - - tabix: - description: Generic indexer for TAB-delimited genome position files. - homepage: https://www.htslib.org/doc/tabix.html - documentation: https://www.htslib.org/doc/tabix.1.html - doi: 10.1093/bioinformatics/btq671 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - tab: - type: file - description: TAB-delimited genome position file compressed with bgzip - pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - tbi: - type: file - description: tabix index file - pattern: "*.{tbi}" - - csi: - type: file - description: coordinate sorted index file - pattern: "*.{csi}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" - - "@maxulysse" diff --git a/modules/nf-core/umitools/dedup/main.nf b/modules/nf-core/umitools/dedup/main.nf deleted file mode 100644 index 90de78d..0000000 --- a/modules/nf-core/umitools/dedup/main.nf +++ /dev/null @@ -1,45 +0,0 @@ -process UMITOOLS_DEDUP { - tag "$meta.id" - label "process_medium" - - conda "bioconda::umi_tools=1.1.2" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' : - 'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }" - - input: - tuple val(meta), path(bam), path(bai) - val get_output_stats - - output: - tuple val(meta), path("*.bam") , emit: bam - tuple val(meta), path("*edit_distance.tsv"), optional:true, emit: tsv_edit_distance - tuple val(meta), path("*per_umi.tsv") , optional:true, emit: tsv_per_umi - tuple val(meta), path("*per_position.tsv") , optional:true, emit: tsv_umi_per_position - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def paired = meta.single_end ? "" : "--paired" - def stats = get_output_stats ? "--output-stats $prefix" : "" - - if (!(args ==~ /.*--random-seed.*/)) {args += " --random-seed=100"} - """ - PYTHONHASHSEED=0 umi_tools \\ - dedup \\ - -I $bam \\ - -S ${prefix}_dedup.bam \\ - $stats \\ - $paired \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/umitools/dedup/meta.yml b/modules/nf-core/umitools/dedup/meta.yml deleted file mode 100644 index 56888e5..0000000 --- a/modules/nf-core/umitools/dedup/meta.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: umitools_dedup -description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read. -keywords: - - umitools - - deduplication -tools: - - umi_tools: - description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) - and single cell RNA-Seq cell barcodes - documentation: https://umi-tools.readthedocs.io/en/latest/ - license: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: | - BAM file containing reads to be deduplicated via UMIs. - pattern: "*.{bam}" - - bai: - type: file - description: | - BAM index files corresponding to the input BAM file. - pattern: "*.{bai}" - - get_output_stats: - type: boolean - description: | - Whether or not to generate output stats. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file with deduplicated UMIs. - pattern: "*.{bam}" - - tsv_edit_distance: - type: file - description: Reports the (binned) average edit distance between the UMIs at each position. - pattern: "*edit_distance.tsv" - - tsv_per_umi: - type: file - description: UMI-level summary statistics. - pattern: "*per_umi.tsv" - - tsv_umi_per_position: - type: file - description: Tabulates the counts for unique combinations of UMI and position. - pattern: "*per_position.tsv" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" - - "@grst" - - "@klkeys" diff --git a/modules/nf-core/umitools/dedup/umitools-dedup.diff b/modules/nf-core/umitools/dedup/umitools-dedup.diff deleted file mode 100644 index d4ce396..0000000 --- a/modules/nf-core/umitools/dedup/umitools-dedup.diff +++ /dev/null @@ -1,14 +0,0 @@ -Changes in module 'nf-core/umitools/dedup' ---- modules/nf-core/umitools/dedup/main.nf -+++ modules/nf-core/umitools/dedup/main.nf -@@ -32,7 +32,7 @@ - PYTHONHASHSEED=0 umi_tools \\ - dedup \\ - -I $bam \\ -- -S ${prefix}.bam \\ -+ -S ${prefix}_dedup.bam \\ - $stats \\ - $paired \\ - $args - -************************************************************ diff --git a/nextflow.config b/nextflow.config index 94a97b9..94caaae 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nf-core/radseq Nextflow config file -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Default config options for all compute environments ---------------------------------------------------------------------------------------- */ @@ -9,194 +9,182 @@ // Global default params, used in configs params { + // TODO nf-core: Specify your pipeline's command line flags // Input options - input = "${baseDir}/data/input.csv" - popmap = "${baseDir}/data/popmap.txt" - - // Workflow options - method = 'denovo' // e.g.'denovo' or 'reference' - - // Type of RADseq - sequence_type = 'PE' // e.g. 'SE', 'PE', 'RPE', 'ROL', and 'OL' + input = null - // Reference options - genome = '/mnt/d/nextflow_testing/Grayling/data/CM014990.1.fna' - igenomes_base = 's3://ngi-igenomes/igenomes' - igenomes_ignore = false - - // Trimming options - cut_right = true - window_size = 25 - mean_min_quality = 20 - pairedend_bp_corr = true - overlap_dif_limit = 1 - clip_r1 = 6 - clip_r2 = 3 - trim_polyg = true - dont_eval_duplicates = false - umi_read_structure = '--umi --umi_loc=read2 --umi_len=8 --umi_skip=2 --umi_prefix=UMI' - - // Denovo options - minreaddepth_withinindividual = null // defaults to 2 - minreaddepth_betweenindividual = null // defaults to 2 - - // cdhit options - cluster_algorithm = 1 // slow but accurate algorithm (0 or 1) - description_length = 100 - sequence_simularity = '.9' // may need to change depending on taxa - - // rainbow div options - similarity_fraction = 0.5 - max_variants = 10 - - // rainbow merge options - min_reads = 2 - max_clusters_for_merge = 10000 - max_reads_for_assembly = 10000 - min_overlap = 20 - min_similarity_fraction = 0.75 - - // Alignment options - aligner = 'bwa' - clipping_penalty = '20,5' - output_secondary = true - mark_short_as_sec = true - min_aln_quality = 30 - matching_score = 1 - mismatch_score = 4 - gap_penalty = 6 - quality_score = 1 - - // Interval options - subset_intervals_channel = null - max_read_coverage_to_split = '500000' - - // Freebayes options - min_map_qual = 5 - min_base_qual = 5 - complex_gap = 3 - use_best_n_alleles = 1 - min_alt_fraction = 10 - min_repeat_entropy = 0.1 - - // Intermediate files - save_trimmed = true - denovo_intermediate_files = true - save_reference_indices = true - save_intervals = true - save_freebayes_intervals = true + // References + genome = null + igenomes_base = 's3://ngi-igenomes/igenomes/' + igenomes_ignore = false // MultiQC options - multiqc_config = null - multiqc_title = null - max_multiqc_email_size = '25.MB' + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options - outdir = './results' - tracedir = "${params.outdir}/pipeline_info" - publish_dir_mode = 'symlink' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - help = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'aligner_options,genomes,method_options,publish_dir_mode,method_options,publish_dir_mode,save_freebayes_intervals,save_intervals,save_reference_fai,save_trim_adapters_fastp,save_cdhit_clstr,save_seqtk_seq_fasta,save_uniq_full_fasta,save_uniqseq,save_trimmed,min_repeat_entropy,min_alt_fraction,use_best_n_alleles,complex_gap,min_base_qual,min_map_qual,max_read_coverage_to_split,subset_intervals_channel,quality_score,gap_penalty,mismatch_score,matching_score,min_aln_quality,mark_short_as_sec,output_secondary,clipping_penalty,aligner,min_similarity_fraction,min_overlap,max_reads_for_assembly,max_clusters_for_merge,min_reads,max_variants,similarity_fraction,sequence_simularity,description_length,cluster_algorithm,minreaddepth_betweenindividual,minreaddepth_withinindividual,umi_read_structure,dont_eval_duplicates,trim_polyg,clip_r2,clip_r1,overlap_dif_limit,pairedend_bp_corr,mean_min_quality,window_size,cut_right,sequence_type,method,popmap' - method_options = ' denovo, reference' - aligner_options = ' bwa, bwamem2' + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + help_full = false + show_hidden = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options + config_profile_name = null + config_profile_description = null - // Config options custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null - - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '2.GB' - max_cpus = 5 - max_time = '240.h' + // Schema validation default options + validate_params = true } // Load base.config by default for all pipelines includeConfig 'conf/base.config' -// Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} - profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true + } conda { - params.enable_conda = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda'] + apptainer.enabled = false + } + mamba { + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - params.enable_conda = false + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' + } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - params.enable_conda = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - params.enable_conda = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - params.enable_conda = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - params.enable_conda = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } + gitpod { + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB + process { + resourceLimits = [ + memory: 8.GB, + cpus : 4, + time : 1.h + ] + } } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } } +// Load nf-core custom profiles from different Institutions +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" + +// Load nf-core/radseq custom profiles from different institutions. +// TODO nf-core: Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs +// includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/radseq.config" : "/dev/null" + +// Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' +charliecloud.registry = 'quay.io' + // Load igenomes.config if required -if (!params.igenomes_ignore) { - includeConfig 'conf/igenomes.config' -} else { - params.genomes = [:] -} +includeConfig !params.igenomes_ignore ? 'conf/igenomes.config' : 'conf/igenomes_ignored.config' // Export these variables to prevent local Python/R libraries from conflicting with those in the container -// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. env { @@ -206,69 +194,92 @@ env { JULIA_DEPOT_PATH = "/usr/local/share/julia" } -// Capture exit codes from upstream processes when piping -process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Set bash options +process.shell = """\ +bash + +set -e # Exit if a tool returns a non-zero status/exit code +set -u # Treat unset variables and parameters as an error +set -o pipefail # Returns the status of the last command to exit with a non-zero status or zero if all successfully execute +set -C # No clobber - prevent output redirection from overwriting files. +""" + +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${params.trace_report_suffix}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${params.trace_report_suffix}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${params.trace_report_suffix}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" + file = "${params.outdir}/pipeline_info/pipeline_dag_${params.trace_report_suffix}.html" } manifest { name = 'nf-core/radseq' - author = 'Gabriel Barrett' + author = """Gabriel Barrett""" // The author field is deprecated from Nextflow version 24.10.0, use contributors instead + contributors = [ + // TODO nf-core: Update the field with the details of the contributors to your pipeline. New with Nextflow version 24.10.0 + [ + name: 'Gabriel Barrett', + affiliation: '', + email: '', + github: '', + contribution: [], // List of contribution types ('author', 'maintainer' or 'contributor') + orcid: '' + ], + ] homePage = 'https://github.com/nf-core/radseq' - description = 'dDocent workflow' + description = """variant calling pipeline for radseq""" mainScript = 'main.nf' - nextflowVersion = '!>=21.10.3' + defaultBranch = 'master' + nextflowVersion = '!>=24.04.2' version = '1.0dev' + doi = '' } -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' +// Nextflow plugins +plugins { + id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } +validation { + defaultIgnoreParams = ["genomes"] + monochromeLogs = params.monochrome_logs + help { + enabled = true + command = "nextflow run nf-core/radseq -profile --input samplesheet.csv --outdir " + fullParameter = "help_full" + showHiddenParameter = "show_hidden" + beforeText = """ +-\033[2m----------------------------------------------------\033[0m- + \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m +\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m +\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m +\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m + \033[0;32m`._,._,\'\033[0m +\033[0;35m nf-core/radseq ${manifest.version}\033[0m +-\033[2m----------------------------------------------------\033[0m- +""" + afterText = """${manifest.doi ? "\n* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/nf-core/radseq/blob/master/CITATIONS.md +""" + } + summary { + beforeText = validation.help.beforeText + afterText = validation.help.afterText } } diff --git a/nextflow_schema.json b/nextflow_schema.json index 5acf950..ad83eab 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,33 +1,32 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/radseq/master/nextflow_schema.json", "title": "nf-core/radseq pipeline parameters", - "description": "dDocent workflow", + "description": "variant calling pipeline for radseq", "type": "object", - "definitions": { + "$defs": { "input_output_options": { "title": "Input/output options", "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/radseq/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, "outdir": { "type": "string", - "description": "Path to the output directory where the results will be saved.", - "default": "./results", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" }, "email": { @@ -59,26 +58,27 @@ "fasta": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", "fa_icon": "far fa-file-code" }, - "igenomes_base": { - "type": "string", - "format": "directory-path", - "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", - "fa_icon": "fas fa-cloud-download-alt", - "hidden": true - }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", "fa_icon": "fas fa-ban", "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + }, + "igenomes_base": { + "type": "string", + "format": "directory-path", + "description": "The base path to the igenomes reference files", + "fa_icon": "fas fa-ban", + "hidden": true, + "default": "s3://ngi-igenomes/igenomes/" } } }, @@ -130,41 +130,6 @@ } } }, - "max_job_request_options": { - "title": "Max job request options", - "type": "object", - "fa_icon": "fab fa-acquisitions-incorporated", - "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", - "properties": { - "max_cpus": { - "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 16, - "fa_icon": "fas fa-microchip", - "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" - }, - "max_memory": { - "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "128.GB", - "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" - }, - "max_time": { - "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", - "default": "240.h", - "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", - "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" - } - } - }, "generic_options": { "title": "Generic options", "type": "object", @@ -172,12 +137,21 @@ "description": "Less common options for the pipeline, typically set in a config file.", "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", "properties": { - "help": { + "version": { "type": "boolean", - "description": "Display help text.", + "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", "hidden": true }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, "email_on_fail": { "type": "string", "description": "Email address for completion summary, only when pipeline fails.", @@ -206,19 +180,31 @@ "fa_icon": "fas fa-palette", "hidden": true }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true + }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true }, - "tracedir": { + "multiqc_logo": { "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", "hidden": true }, + "multiqc_methods_description": { + "type": "string", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -226,37 +212,34 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { - "type": "boolean", - "fa_icon": "far fa-eye-slash", - "description": "Show all params when using `--help`", - "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true }, - "enable_conda": { - "type": "boolean", - "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.", - "hidden": true, - "fa_icon": "fas fa-bacon" + "trace_report_suffix": { + "type": "string", + "fa_icon": "far calendar", + "description": "Suffix to add to the trace report filename. Default is the date and time in the format yyyy-MM-dd_HH-mm-ss.", + "hidden": true } } } }, "allOf": [ { - "$ref": "#/definitions/input_output_options" - }, - { - "$ref": "#/definitions/reference_genome_options" + "$ref": "#/$defs/input_output_options" }, { - "$ref": "#/definitions/institutional_config_options" + "$ref": "#/$defs/reference_genome_options" }, { - "$ref": "#/definitions/max_job_request_options" + "$ref": "#/$defs/institutional_config_options" }, { - "$ref": "#/definitions/generic_options" + "$ref": "#/$defs/generic_options" } ] } diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json new file mode 100644 index 0000000..6c19f14 --- /dev/null +++ b/ro-crate-metadata.json @@ -0,0 +1,295 @@ +{ + "@context": [ + "https://w3id.org/ro/crate/1.1/context", + { + "GithubService": "https://w3id.org/ro/terms/test#GithubService", + "JenkinsService": "https://w3id.org/ro/terms/test#JenkinsService", + "PlanemoEngine": "https://w3id.org/ro/terms/test#PlanemoEngine", + "TestDefinition": "https://w3id.org/ro/terms/test#TestDefinition", + "TestInstance": "https://w3id.org/ro/terms/test#TestInstance", + "TestService": "https://w3id.org/ro/terms/test#TestService", + "TestSuite": "https://w3id.org/ro/terms/test#TestSuite", + "TravisService": "https://w3id.org/ro/terms/test#TravisService", + "definition": "https://w3id.org/ro/terms/test#definition", + "engineVersion": "https://w3id.org/ro/terms/test#engineVersion", + "instance": "https://w3id.org/ro/terms/test#instance", + "resource": "https://w3id.org/ro/terms/test#resource", + "runsOn": "https://w3id.org/ro/terms/test#runsOn" + } + ], + "@graph": [ + { + "@id": "./", + "@type": "Dataset", + "creativeWorkStatus": "InProgress", + "datePublished": "2024-12-12T11:25:20+00:00", + "description": "

\n \n \n \"nf-core/radseq\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/radseq/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/radseq/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/radseq/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/radseq/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/radseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/radseq)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23radseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/radseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/radseq** is a bioinformatics pipeline that ...\n\n\n\n\n\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\n\nNow, you can run the pipeline using:\n\n\n\n```bash\nnextflow run nf-core/radseq \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/radseq/usage) and the [parameter documentation](https://nf-co.re/radseq/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/radseq/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/radseq/output).\n\n## Credits\n\nnf-core/radseq was originally written by Gabriel Barrett.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#radseq` channel](https://nfcore.slack.com/channels/radseq) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "hasPart": [ + { + "@id": "main.nf" + }, + { + "@id": "assets/" + }, + { + "@id": "conf/" + }, + { + "@id": "docs/" + }, + { + "@id": "docs/images/" + }, + { + "@id": "modules/" + }, + { + "@id": "modules/nf-core/" + }, + { + "@id": "workflows/" + }, + { + "@id": "subworkflows/" + }, + { + "@id": "nextflow.config" + }, + { + "@id": "README.md" + }, + { + "@id": "nextflow_schema.json" + }, + { + "@id": "CHANGELOG.md" + }, + { + "@id": "LICENSE" + }, + { + "@id": "CODE_OF_CONDUCT.md" + }, + { + "@id": "CITATIONS.md" + }, + { + "@id": "modules.json" + }, + { + "@id": "docs/usage.md" + }, + { + "@id": "docs/output.md" + }, + { + "@id": ".nf-core.yml" + }, + { + "@id": ".pre-commit-config.yaml" + }, + { + "@id": ".prettierignore" + } + ], + "isBasedOn": "https://github.com/nf-core/radseq", + "license": "MIT", + "mainEntity": { + "@id": "main.nf" + }, + "mentions": [ + { + "@id": "#2ae36dd0-e5b0-4555-9ef5-f75a62ec4496" + } + ], + "name": "nf-core/radseq" + }, + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": [ + { + "@id": "https://w3id.org/ro/crate/1.1" + }, + { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + } + ] + }, + { + "@id": "main.nf", + "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "dateCreated": "", + "dateModified": "2024-12-12T11:25:20Z", + "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", + "keywords": ["nf-core", "nextflow"], + "license": ["MIT"], + "name": ["nf-core/radseq"], + "programmingLanguage": { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" + }, + "sdPublisher": { + "@id": "https://nf-co.re/" + }, + "url": ["https://github.com/nf-core/radseq", "https://nf-co.re/radseq/dev/"], + "version": ["1.0dev"] + }, + { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", + "@type": "ComputerLanguage", + "identifier": { + "@id": "https://www.nextflow.io/" + }, + "name": "Nextflow", + "url": { + "@id": "https://www.nextflow.io/" + }, + "version": "!>=24.04.2" + }, + { + "@id": "#2ae36dd0-e5b0-4555-9ef5-f75a62ec4496", + "@type": "TestSuite", + "instance": [ + { + "@id": "#4229dfdb-8ba3-48cd-a9aa-b5260e3045f6" + } + ], + "mainEntity": { + "@id": "main.nf" + }, + "name": "Test suite for nf-core/radseq" + }, + { + "@id": "#4229dfdb-8ba3-48cd-a9aa-b5260e3045f6", + "@type": "TestInstance", + "name": "GitHub Actions workflow for testing nf-core/radseq", + "resource": "repos/nf-core/radseq/actions/workflows/ci.yml", + "runsOn": { + "@id": "https://w3id.org/ro/terms/test#GithubService" + }, + "url": "https://api.github.com" + }, + { + "@id": "https://w3id.org/ro/terms/test#GithubService", + "@type": "TestService", + "name": "Github Actions", + "url": { + "@id": "https://github.com" + } + }, + { + "@id": "assets/", + "@type": "Dataset", + "description": "Additional files" + }, + { + "@id": "conf/", + "@type": "Dataset", + "description": "Configuration files" + }, + { + "@id": "docs/", + "@type": "Dataset", + "description": "Markdown files for documenting the pipeline" + }, + { + "@id": "docs/images/", + "@type": "Dataset", + "description": "Images for the documentation files" + }, + { + "@id": "modules/", + "@type": "Dataset", + "description": "Modules used by the pipeline" + }, + { + "@id": "modules/nf-core/", + "@type": "Dataset", + "description": "nf-core modules" + }, + { + "@id": "workflows/", + "@type": "Dataset", + "description": "Main pipeline workflows to be executed in main.nf" + }, + { + "@id": "subworkflows/", + "@type": "Dataset", + "description": "Smaller subworkflows" + }, + { + "@id": "nextflow.config", + "@type": "File", + "description": "Main Nextflow configuration file" + }, + { + "@id": "README.md", + "@type": "File", + "description": "Basic pipeline usage information" + }, + { + "@id": "nextflow_schema.json", + "@type": "File", + "description": "JSON schema for pipeline parameter specification" + }, + { + "@id": "CHANGELOG.md", + "@type": "File", + "description": "Information on changes made to the pipeline" + }, + { + "@id": "LICENSE", + "@type": "File", + "description": "The license - should be MIT" + }, + { + "@id": "CODE_OF_CONDUCT.md", + "@type": "File", + "description": "The nf-core code of conduct" + }, + { + "@id": "CITATIONS.md", + "@type": "File", + "description": "Citations needed when using the pipeline" + }, + { + "@id": "modules.json", + "@type": "File", + "description": "Version information for modules from nf-core/modules" + }, + { + "@id": "docs/usage.md", + "@type": "File", + "description": "Usage documentation" + }, + { + "@id": "docs/output.md", + "@type": "File", + "description": "Output documentation" + }, + { + "@id": ".nf-core.yml", + "@type": "File", + "description": "nf-core configuration file, configuring template features and linting rules" + }, + { + "@id": ".pre-commit-config.yaml", + "@type": "File", + "description": "Configuration file for pre-commit hooks" + }, + { + "@id": ".prettierignore", + "@type": "File", + "description": "Ignore file for prettier" + }, + { + "@id": "https://nf-co.re/", + "@type": "Organization", + "name": "nf-core", + "url": "https://nf-co.re/" + } + ] +} diff --git a/subworkflows/local/bam_intervals_bedtools.nf b/subworkflows/local/bam_intervals_bedtools.nf deleted file mode 100644 index 9fd4409..0000000 --- a/subworkflows/local/bam_intervals_bedtools.nf +++ /dev/null @@ -1,74 +0,0 @@ -include { BEDTOOLS_BAMTOBED } from '../../modules/nf-core/bedtools/bamtobed/main.nf' -include { BEDOPS_MERGE_BED } from '../../modules/local/bedops/merge/main.nf' -include { BEDTOOLS_SORT } from '../../modules/nf-core/bedtools/sort/main.nf' -include { BEDTOOLS_COVERAGE } from '../../modules/nf-core/bedtools/coverage/main.nf' -include { BEDTOOLS_MERGE_COV } from '../../modules/nf-core/bedtools/merge/main.nf' -include { CREATE_INTERVALS } from '../../modules/local/create_intervals.nf' -include { BEDTOOLS_MAKEWINDOWS } from '../../modules/nf-core/bedtools/makewindows/main.nf' -include { BEDTOOLS_INTERSECT } from '../../modules/nf-core/bedtools/intersect/main.nf' - -workflow BAM_INTERVALS_BEDTOOLS { - - take: - bam - faidx - read_lengths - coverage_threshold - - main: - ch_versions = Channel.empty() - - // Reduce the number of bam files to be passed in the subworkflow via parameters - // Purpose: Memory constraints for large sample sizes at BEDTOOLS_MERGE_COV - // .randomSample(# to subset to, random seed) random seed is critical to -resume functionality - ch_bam = params.subset_intervals_channel ? bam.randomSample(params.subset_intervals_channel, 234) : bam - - ch_bed = BEDTOOLS_BAMTOBED (ch_bam).bed - ch_versions = ch_versions.mix (BEDTOOLS_BAMTOBED.out.versions) - - ch_bed_to_merge = ch_bed.map { - meta, bed -> - [['id':meta.id.split(/\d+/)[0]], bed ] // split based on number and return the first element and group bed files based on shared id string - } - .groupTuple() - - ch_mbed = BEDOPS_MERGE_BED (ch_bed_to_merge).bed - ch_versions = ch_versions.mix(BEDOPS_MERGE_BED.out.versions) - - ch_sorted_mbed = BEDTOOLS_SORT (ch_mbed, faidx.first()).sorted - ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) - - // Calculate read coverage across indv. samples - cov = BEDTOOLS_COVERAGE (ch_bed.combine(ch_sorted_mbed.map{it[1]}).map{meta,bed,mbed -> [meta,mbed,bed]}, faidx.first()).bed - ch_versions = ch_versions.mix (BEDTOOLS_COVERAGE.out.versions) - - ch_cov_to_merge = cov.map { - meta, bed -> - [['id':meta.id.split(/\d+/)[0]], bed ] - } - .groupTuple() - - // combines overlapping features into a single report - ch_mcov = BEDTOOLS_MERGE_COV (ch_cov_to_merge, faidx.first()).cov - ch_versions = ch_versions.mix (BEDTOOLS_MERGE_COV.out.versions) - - // split into 2 files: high and low then make intervals across the genome based - ch_split_high_coverage = BEDTOOLS_MAKEWINDOWS (ch_mcov, true, read_lengths, coverage_threshold).tab - ch_versions = ch_versions.mix (BEDTOOLS_MAKEWINDOWS.out.versions) - - // Writes overlapping regions into new bed file - ch_intersect = BEDTOOLS_INTERSECT (ch_mcov.join(ch_split_high_coverage), 'bed').intersect - ch_versions = ch_versions.mix (BEDTOOLS_INTERSECT.out.versions) - - ch_createintervals = ch_mcov.join(ch_intersect).join(BEDTOOLS_MAKEWINDOWS.out.low_cov) - - //TODO #2: Convert into Groovy function in nf-core-radseq/lib/WorkflowRadseq.groovy - ch_intervals = CREATE_INTERVALS (ch_createintervals, read_lengths).intervals.transpose() - ch_versions = ch_versions.mix (CREATE_INTERVALS.out.versions) - - emit: - intervals = ch_intervals - - versions = ch_versions - -} \ No newline at end of file diff --git a/subworkflows/local/bam_merge_index_samtools b/subworkflows/local/bam_merge_index_samtools deleted file mode 100644 index c8209b6..0000000 --- a/subworkflows/local/bam_merge_index_samtools +++ /dev/null @@ -1,33 +0,0 @@ -include { SAMTOOLS_INDEX as INDEX_MERGE_BAM } from '../../modules/nf-core/samtools/index/main.nf' -include { SAMTOOLS_MERGE as MERGE_BAM } from '../../modules/nf-core/samtools/merge/main.nf' - -workflow BAM_MERGE_INDEX_SAMTOOLS { - take: - bam // channel: [mandatory] meta, bam - fasta - fai - - main: - ch_versions = Channel.empty() - - ch_bam_to_merge = bam.map { - meta, bed -> - [['id':meta.id.split(/\d+/)[0]], bed ] - } - .groupTuple() - - MERGE_BAM(ch_bam_to_merge, fasta, fai) - - INDEX_MERGE_BAM(MERGE_BAM.out.bam) - - bam_bai = MERGE_BAM.out.bam - .join(INDEX_MERGE_BAM.out.bai) - - // Gather versions of all tools used - ch_versions = ch_versions.mix(INDEX_MERGE_BAM.out.versions.first()) - ch_versions = ch_versions.mix(MERGE_BAM.out.versions.first()) - - emit: - bam_bai = bam_bai - versions = ch_versions -} \ No newline at end of file diff --git a/subworkflows/local/bam_variant_calling_freebayes b/subworkflows/local/bam_variant_calling_freebayes deleted file mode 100644 index cf92bfc..0000000 --- a/subworkflows/local/bam_variant_calling_freebayes +++ /dev/null @@ -1,36 +0,0 @@ -include { FREEBAYES } from '../../modules/nf-core/freebayes/main.nf' -include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort/main.nf' -include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main.nf' -include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat/main.nf' - - -workflow BAM_VARIANT_CALLING_FREEBAYES { - - take: - bam_bai_bed // [[meta], bam, bai, bed] - intervals // true or false - fasta // reference [fasta] - fai // reference index [faidx] - - main: - ch_versions = Channel.empty() - - popmap = params.popmap ? Channel.fromPath(params.popmap).first() : [] - - FREEBAYES (bam_bai_bed, intervals, fasta.first(), fai.first(), [], popmap, []) - ch_versions = ch_versions.mix(FREEBAYES.out.versions) - - ch_vcfsort = BCFTOOLS_SORT (FREEBAYES.out.vcf).vcf - ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions) - - ch_tbi = TABIX_TABIX (BCFTOOLS_SORT.out.vcf).tbi - ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) - - BCFTOOLS_CONCAT (ch_vcfsort.join (ch_tbi).map{meta,vcf,tbi->[[id:meta.id],vcf,tbi]}.groupTuple(by:0)) - ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions) - - emit: - vcf = BCFTOOLS_CONCAT.out.vcf - - versions = ch_versions -} \ No newline at end of file diff --git a/subworkflows/local/cdhit_rainbow b/subworkflows/local/cdhit_rainbow deleted file mode 100644 index ddf7aa6..0000000 --- a/subworkflows/local/cdhit_rainbow +++ /dev/null @@ -1,85 +0,0 @@ -include { PREPARE_FORWARD_READS } from '../../modules/local/prepare_forward_reads.nf' -include { COMBINE_UNIQUE_READS } from '../../modules/local/combine_uniq_forward_reads.nf' -include { SEQTK_SEQ } from '../../modules/nf-core/seqtk/seq/main.nf' -include { FASTP as DENOVO_FASTP } from '../../modules/nf-core/fastp/main.nf' -include { CDHIT } from '../../modules/nf-core/cdhit/cdhit/main.nf' // cd-hit -include { CDHIT_TO_RBDIV } from '../../modules/local/cdhit_to_rbdiv.nf' -include { RAINBOW_DIV } from '../../modules/local/rainbow/div/rainbow_div.nf' // rainbow div -include { RAINBOW_MERGE } from '../../modules/local/rainbow/merge/rainbow_merge.nf' // rainbow merge -include { RBMERGE2FASTA as WRITE_FASTA } from '../../modules/local/rbmerge2fasta.nf' // write fasta - - /* Collect individual uniq reads for COMBINING into one Fasta - * Combine reads and experiment with thresholds: - * WithinIndividualRead_MinimumDepth - * BetweenIndividualRead_MinimumDepth - * uniq sequences -> FASTA format using seqtk seq - * TRIM reads with large adapter content using fastp - * Cluster reads using cd-hit est - */ - -workflow CDHIT_RAINBOW { - take: - reads // [[:], [1.fq.gz, 2.fq.gz]] - sequence_type // value exe. 'PE' or 'ROP' - - main: - ch_versions = Channel.empty() - - // deduplicate forward reads - ch_uniq_forwardreads = PREPARE_FORWARD_READS (reads, sequence_type).indv_uniq_seqs - ch_versions = ch_versions.mix(PREPARE_FORWARD_READS.out.versions) - - ch_combine_unique_reads = ch_uniq_forwardreads - .map { - meta, fasta -> - def metaf = [:] // initialize groovy map - metaf.id = meta.id.split(/\d+/)[0] // set id splits at the first number appearance and retains items to the left - metaf.single_end = meta.single_end - - [metaf, fasta] - } - .groupTuple() - - - def minReadDepth_WithinIndividual = params.minreaddepth_withinindividual ?: [2] - def minReadDepth_BetweenIndividual = params.minreaddepth_betweenindividual ?: [2] - - // Combine forward reads across individuals - ch_uniq_full_fasta = COMBINE_UNIQUE_READS (ch_combine_unique_reads, sequence_type, Channel.fromList(minReadDepth_WithinIndividual), Channel.fromList(minReadDepth_BetweenIndividual)).uniq_reads - ch_versions = ch_versions.mix(COMBINE_UNIQUE_READS.out.versions) - - // write dummy quality scores for fastp - ch_uniq_seqtk_fq = SEQTK_SEQ (ch_uniq_full_fasta).fastx - ch_versions = ch_versions.mix(SEQTK_SEQ.out.versions) - - // trim adapter content: last true statement activates code block tailored to denovo pipeline since this was based off a nf-core module - ch_trimadapters_uniq_fasta = DENOVO_FASTP (ch_uniq_seqtk_fq, ch_uniq_full_fasta, false, false, true).fasta - ch_versions = ch_versions.mix(DENOVO_FASTP.out.versions) - - // cluster - ch_cdhit_cluster = CDHIT (ch_trimadapters_uniq_fasta, DENOVO_FASTP.out.totaluniqseq, sequence_type).cdhit_cluster - ch_versions = ch_versions.mix(CDHIT.out.versions) - - if (params.sequence_type == 'PE' || params.sequence_type == 'SE') {totaluniqseq = DENOVO_FASTP.out.totaluniqseq} else {totaluniqseq = CDHIT.out.forward_uniq} - - // cd-hit cluster to rainbow cluster format - ch_rbcluster = CDHIT_TO_RBDIV (ch_cdhit_cluster, totaluniqseq, sequence_type).rbcluster - ch_versions = ch_versions.mix(CDHIT_TO_RBDIV.out.versions) - - // div - ch_rbdiv = RAINBOW_DIV (ch_rbcluster).rbdiv - ch_versions = ch_versions.mix(RAINBOW_DIV.out.versions) // contains awk code not provided in the versions.yml file TODO: awk BusyBox trouble w/ extracting version number within container - - // merge - ch_rbmerge = RAINBOW_MERGE (ch_rbdiv, sequence_type, true).rbmerge - ch_versions = ch_versions.mix(RAINBOW_MERGE.out.versions) - - // output fasta - ch_fasta = WRITE_FASTA (ch_rbdiv, ch_rbmerge).fasta - ch_versions = ch_versions.mix(WRITE_FASTA.out.versions) - - emit: - fasta = ch_fasta - - versions = ch_versions -} \ No newline at end of file diff --git a/subworkflows/local/fastp_processradtags b/subworkflows/local/fastp_processradtags deleted file mode 100644 index a05d778..0000000 --- a/subworkflows/local/fastp_processradtags +++ /dev/null @@ -1,37 +0,0 @@ -include { FASTP } from '../../modules/nf-core/fastp/main.nf' - -workflow PROCESS_RAD { - take: - reads - - main: - ch_versions = Channel.empty() - - // get the first read from fastq files to calculate read length from - reads - .map { - meta, reads -> - if (meta.single_end) { - return [meta.id,reads[0]] - } else { - return [meta.id,reads[1]] - } - } - .splitFastq(record:true,limit:1) - .map { meta, splitfastq -> - return [splitfastq.readString] - } - .set {read_lengths} - - ch_fastp = FASTP (reads, [[],[]], true, false, false).reads // [[meta],[reads]], save_trimmed_failed, save_merged - ch_versions = ch_versions.mix(FASTP.out.versions) - - emit: - trimmed_reads = ch_fastp - read_lengths - fastp_json = FASTP.out.json - - versions = ch_versions - -} - diff --git a/subworkflows/local/fastq_index_align_bwa_minimap b/subworkflows/local/fastq_index_align_bwa_minimap deleted file mode 100644 index e99a49d..0000000 --- a/subworkflows/local/fastq_index_align_bwa_minimap +++ /dev/null @@ -1,104 +0,0 @@ -// -// Align reads w/ BWA or MINIMAP -// - -include { BWA_INDEX } from '../../modules/nf-core/bwa/index/main.nf' -include { BWA_MEM } from '../../modules/nf-core/bwa/mem/main.nf' -include { BWAMEM2_INDEX } from '../../modules/nf-core/bwamem2/index/main.nf' -include { BWAMEM2_MEM } from '../../modules/nf-core/bwamem2/mem/main.nf' -include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/index/main.nf' -include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main.nf' -include { UMITOOLS_DEDUP } from '../../modules/nf-core/umitools/dedup/main.nf' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_UMI } from '../../modules/nf-core/samtools/index/main.nf' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_BAM } from '../../modules/nf-core/samtools/index/main.nf' -include { BAM_STATS_SAMTOOLS } from '../nf-core/bam_stats_samtools/main.nf' -include { BAM_MERGE_INDEX_SAMTOOLS } from '../local/bam_merge_index_samtools' - - -workflow FASTQ_INDEX_ALIGN_BWA_MINIMAP { - - take: - reads - fasta - fai - sequence_type - read_lengths - - main: - ch_versions = Channel.empty() - - switch ( params.aligner ) { - - case 'bwamem': - - BWA_INDEX (fasta) - ch_versions = ch_versions.mix(BWA_INDEX.out.versions) - - // add specific arguments - bam = BWA_MEM (reads, BWA_INDEX.out.index.first(), true, sequence_type, read_lengths.collect()).bam - ch_versions = ch_versions.mix(BWA_MEM.out.versions) - break - - /*case 'minimap2': - - //MINIMAP_INDEX (fasta) - //ch_versions = ch_versions.mix(MINIMAP_INDEX.out.versions) - - bam = MINIMAP2_ALIGN (reads, fasta.map{it[1]}.first(), true, false, false).bam - ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions) - break*/ - - case 'bwamem2': - - BWAMEM2_INDEX (fasta) - ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions) - - // add specific arguments - bam = BWAMEM2_MEM (reads, BWAMEM2_INDEX.out.index.first(), true, sequence_type, read_lengths.collect()).bam - ch_versions = ch_versions.mix(BWAMEM2_MEM.out.versions) - break - - default: - - exit 1, "unsupported aligner: ${aligner} \n supported options:" + params.aligner_options - - } - - // split up into 2 channels to decide input for deduplication - bam_to_umi = bam - .branch { meta, bam -> - umi: meta.umi_barcodes == true - no_umi: meta.umi_barcodes == false - } - // need indices with bam files for umitools dedup - bam_bai_to_umi = SAMTOOLS_INDEX_UMI(bam_to_umi.umi).bai.join(bam_to_umi.umi).map{meta, bai, bam -> [meta, bam, bai]} - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_UMI.out.versions) - - // query only reads that satisfied the above condition - umi_bam = UMITOOLS_DEDUP (bam_bai_to_umi, true).bam - ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions) - - bai = SAMTOOLS_INDEX_BAM (bam_to_umi.no_umi.mix(umi_bam)).bai - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_BAM.out.versions) - - // add deduplicated umi reads to no_umi channnel and join the index's - bam_bai = bam_to_umi.no_umi - .mix(umi_bam) - .join(bai) - - // get statistics - BAM_STATS_SAMTOOLS (bam_bai, fasta.map{it[1]}.first()) - ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) - - mbam_bai = BAM_MERGE_INDEX_SAMTOOLS (bam_bai.map{meta,bam,bai -> [meta, bam]}, fasta.map{it[1]}.first(), fai.map{it[1]}.first()).bam_bai - ch_versions = ch_versions.mix(BAM_MERGE_INDEX_SAMTOOLS.out.versions) - - emit: - bam_bai - mbam_bai - stats = BAM_STATS_SAMTOOLS.out.stats - flagstat = BAM_STATS_SAMTOOLS.out.flagstat - idxstats = BAM_STATS_SAMTOOLS.out.idxstats - - versions = ch_versions -} \ No newline at end of file diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index d294f04..0000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,43 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channels(it) } - .set { reads } - - emit: - reads // channel: [ val(meta), [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channels(LinkedHashMap row) { - def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() - meta.umi_barcodes = row.umi_barcodes.toBoolean() - - def array = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (meta.single_end) { - array = [ meta, [ file(row.fastq_1) ] ] - } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - array = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] - } - return array -} diff --git a/subworkflows/local/utils_nfcore_radseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_radseq_pipeline/main.nf new file mode 100644 index 0000000..d59533a --- /dev/null +++ b/subworkflows/local/utils_nfcore_radseq_pipeline/main.nf @@ -0,0 +1,264 @@ +// +// Subworkflow with functionality specific to the nf-core/radseq pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW TO INITIALISE PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + UTILS_NFSCHEMA_PLUGIN ( + workflow, + validate_params, + null + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // + // Create channel from input file provided through params.input + // + + Channel + .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + .map { + meta, fastq_1, fastq_2 -> + if (!fastq_2) { + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + } else { + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + } + } + .groupTuple() + .map { samplesheet -> + validateInputSamplesheet(samplesheet) + } + .map { + meta, fastqs -> + return [ meta, fastqs.flatten() ] + } + .set { ch_samplesheet } + + emit: + samplesheet = ch_samplesheet + versions = ch_versions +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW FOR PIPELINE COMPLETION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + def multiqc_reports = multiqc_report.toList() + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail( + summary_params, + email, + email_on_fail, + plaintext_email, + outdir, + monochrome_logs, + multiqc_reports.getVal(), + ) + } + + completionSummary(monochrome_logs) + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familiar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + def manifest_doi = meta.manifest_map.doi.tokenize(",") + manifest_doi.each { doi_ref -> + temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + } + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} + diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf deleted file mode 100644 index 9d4294f..0000000 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf +++ /dev/null @@ -1,56 +0,0 @@ -// -// UMI-tools dedup, index BAM file and run samtools stats, flagstat and idxstats -// - -include { UMITOOLS_DEDUP } from '../../../modules/nf-core/umitools/dedup/main' -include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' -include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' - -workflow BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS { - take: - bam_bai // channel: [ val(meta), [ bam ], [ bai/csi ] ] - get_dedup_stats // boolean: true/false - - main: - - ch_versions = Channel.empty() - - // - // UMI-tools dedup - // - UMITOOLS_DEDUP ( bam_bai, get_dedup_stats ) - ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions.first()) - - // - // Index BAM file and run samtools stats, flagstat and idxstats - // - SAMTOOLS_INDEX ( UMITOOLS_DEDUP.out.bam ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - - UMITOOLS_DEDUP.out.bam - .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) - .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) - .map { - meta, bam, bai, csi -> - if (bai) { - [ meta, bam, bai ] - } else { - [ meta, bam, csi ] - } - } - .set { ch_bam_bai } - - BAM_STATS_SAMTOOLS ( ch_bam_bai, [] ) - ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) - - emit: - bam = UMITOOLS_DEDUP.out.bam // channel: [ val(meta), [ bam ] ] - - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/meta.yml b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/meta.yml deleted file mode 100644 index a3b2947..0000000 --- a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/meta.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: "bam_dedup_stats_samtools_umitools" -description: UMI-tools dedup, index BAM file and run samtools stats, flagstat and idxstats -keywords: - - umi - - dedup - - index - - bam - - sam - - cram -modules: - - umitools/dedup - - samtools/index - - samtools/stats - - samtools/idxstats - - samtools/flagstat -input: - - bam_bai: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - get_dedup_stats: - type: boolean - description: | - Generate output stats when running "umi_tools dedup" -output: - - bam: - type: file - description: Umi deduplicated BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - bai: - type: file - description: Umi deduplicated BAM/CRAM/SAM samtools index - pattern: "*.{bai,crai,sai}" - - csi: - type: file - description: CSI samtools index - pattern: "*.csi" - - stats: - type: file - description: File containing samtools stats output - pattern: "*.{stats}" - - flagstat: - type: file - description: File containing samtools flagstat output - pattern: "*.{flagstat}" - - idxstats: - type: file - description: File containing samtools idxstats output - pattern: "*.{idxstats}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@KamilMaliszArdigen" diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf deleted file mode 100644 index cfcc48d..0000000 --- a/subworkflows/nf-core/bam_stats_samtools/main.nf +++ /dev/null @@ -1,32 +0,0 @@ -// -// Run SAMtools stats, flagstat and idxstats -// - -include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' -include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main' -include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' - -workflow BAM_STATS_SAMTOOLS { - take: - bam_bai // channel: [ val(meta), [ bam/cram ], [bai/csi] ] - fasta // channel: [ fasta ] - - main: - ch_versions = Channel.empty() - - SAMTOOLS_STATS ( bam_bai, fasta ) - ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) - - SAMTOOLS_FLAGSTAT ( bam_bai ) - ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) - - SAMTOOLS_IDXSTATS ( bam_bai ) - ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) - - emit: - stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/bam_stats_samtools/meta.yml b/subworkflows/nf-core/bam_stats_samtools/meta.yml deleted file mode 100644 index 5252b0e..0000000 --- a/subworkflows/nf-core/bam_stats_samtools/meta.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: bam_stats_samtools -description: Produces comprehensive statistics from SAM/BAM/CRAM file -keywords: - - statistics - - counts - - bam - - sam - - cram -modules: - - samtools/stats - - samtools/idxstats - - samtools/flagstat -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - bai: - type: file - description: Index for BAM/CRAM/SAM file - pattern: "*.{bai,crai,sai}" - - fasta: - type: file - description: Reference genome fasta file - pattern: "*.{fasta,fa}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - stats: - type: file - description: File containing samtools stats output - pattern: "*.{stats}" - - flagstat: - type: file - description: File containing samtools flagstat output - pattern: "*.{flagstat}" - - idxstats: - type: file - description: File containing samtools idxstats output - pattern: "*.{idxstats}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 0000000..d6e593e --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info("${workflow.manifest.name} ${getWorkflowVersion()}") + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = groovy.json.JsonOutput.toJson(params) + temp_pf.text = groovy.json.JsonOutput.prettyPrint(jsonStr) + + nextflow.extension.FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + def parser = new org.yaml.snakeyaml.Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } + catch (NullPointerException e) { + log.debug(e) + log.warn("Could not verify conda channel configuration.") + return null + } + catch (IOException e) { + log.debug(e) + log.warn("Could not verify conda channel configuration.") + return null + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = required_channels_in_order != channels.findAll { ch -> ch in required_channels_in_order } + + if (channels_missing | channel_priority_violation) { + log.warn """\ + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + There is a problem with your Conda configuration! + You will need to set-up the conda-forge and bioconda channels correctly. + Please refer to https://bioconda.github.io/ + The observed channel order is + ${channels} + but the following channel order is required: + ${required_channels_in_order} + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + """.stripIndent(true) + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 0000000..e5c3a0a --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000..68718e4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000..e3f0baf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000..02dbf09 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,113 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + expect { + with(workflow) { + assert success + assert "nextflow_workflow v9.9.9" in stdout + } + } + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 0000000..a09572e --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 0000000..f847611 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 0000000..bfd2587 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,419 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NFCORE_PIPELINE { + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + def valid_config = true as Boolean + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn( + "[${workflow.manifest.name}] You are attempting to run the pipeline without any custom configuration!\n\n" + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + "Please refer to the quick start section and usage docs for the pipeline.\n " + ) + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error( + "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } + if (nextflow_cli_args[0]) { + log.warn( + "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + def yaml = new org.yaml.snakeyaml.Yaml() + def versions = yaml.load(yaml_file).collectEntries { k, v -> [k.tokenize(':')[-1], v] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + ${workflow.manifest.name}: ${getWorkflowVersion()} + Nextflow: ${workflow.nextflow.version} + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + summary_params + .keySet() + .each { group -> + def group_params = summary_params.get(group) + // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    ${group}

    \n" + summary_section += "
    \n" + group_params + .keySet() + .sort() + .each { param -> + summary_section += "
    ${param}
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + def yaml_file_text = "id: '${workflow.manifest.name.replace('/', '-')}-summary'\n" as String + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + def colorcodes = [:] as Map + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// Return a single report from an object that may be a Path or List +// +def getSingleReport(multiqc_reports) { + if (multiqc_reports instanceof Path) { + return multiqc_reports + } else if (multiqc_reports instanceof List) { + if (multiqc_reports.size() == 0) { + log.warn("[${workflow.manifest.name}] No reports found from process 'MULTIQC'") + return null + } else if (multiqc_reports.size() == 1) { + return multiqc_reports.first() + } else { + log.warn("[${workflow.manifest.name}] Found multiple reports from process 'MULTIQC', will use only one") + return multiqc_reports.first() + } + } else { + return null + } +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[${workflow.manifest.name}] Successful: ${workflow.runName}" + if (!workflow.success) { + subject = "[${workflow.manifest.name}] FAILED: ${workflow.runName}" + } + + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) { + misc_fields['Pipeline repository Git URL'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['Pipeline repository Git Commit'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['Pipeline Git branch/tag'] = workflow.revision + } + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = getSingleReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as MemoryUnit + def smail_fields = [email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + def colors = logColours(monochrome_logs) as Map + if (email_address) { + try { + if (plaintext_email) { + new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') + } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + ['sendmail', '-t'].execute() << sendmail_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (sendmail)-") + } + catch (Exception msg) { + log.debug(msg.toString()) + log.debug("Trying with mail instead of sendmail") + // Catch failures and try with plaintext + def mail_cmd = ['mail', '-s', subject, '--content-type=text/html', email_address] + mail_cmd.execute() << email_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (mail)-") + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + nextflow.extension.FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html") + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + nextflow.extension.FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt") + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Pipeline completed successfully${colors.reset}-") + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-") + } + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.red} Pipeline completed with errors${colors.reset}-") + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) { + misc_fields['repository'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['commitid'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['revision'] = workflow.revision + } + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection() + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")) + def postRC = post.getResponseCode() + if (!postRC.equals(200)) { + log.warn(post.getErrorStream().getText()) + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 0000000..d08d243 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000..f117040 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,126 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function getSingleReport with a single file") { + function "getSingleReport" + + when { + function { + """ + input[0] = file(params.modules_testdata_base_path + '/generic/tsv/test.tsv', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert function.result.contains("test.tsv") } + ) + } + } + + test("Test Function getSingleReport with multiple files") { + function "getSingleReport" + + when { + function { + """ + input[0] = [ + file(params.modules_testdata_base_path + '/generic/tsv/test.tsv', checkIfExists: true), + file(params.modules_testdata_base_path + '/generic/tsv/network.tsv', checkIfExists: true), + file(params.modules_testdata_base_path + '/generic/tsv/expression.tsv', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert function.result.contains("test.tsv") }, + { assert !function.result.contains("network.tsv") }, + { assert !function.result.contains("expression.tsv") } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000..02c6701 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,136 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000..8940d32 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 0000000..859d103 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 0000000..d0a926b --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 0000000..ac8523c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf new file mode 100644 index 0000000..4994303 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -0,0 +1,46 @@ +// +// Subworkflow that uses the nf-schema plugin to validate parameters and render the parameter summary +// + +include { paramsSummaryLog } from 'plugin/nf-schema' +include { validateParameters } from 'plugin/nf-schema' + +workflow UTILS_NFSCHEMA_PLUGIN { + + take: + input_workflow // workflow: the workflow object used by nf-schema to get metadata from the workflow + validate_params // boolean: validate the parameters + parameters_schema // string: path to the parameters JSON schema. + // this has to be the same as the schema given to `validation.parametersSchema` + // when this input is empty it will automatically use the configured schema or + // "${projectDir}/nextflow_schema.json" as default. This input should not be empty + // for meta pipelines + + main: + + // + // Print parameter summary to stdout. This will display the parameters + // that differ from the default given in the JSON schema + // + if(parameters_schema) { + log.info paramsSummaryLog(input_workflow, parameters_schema:parameters_schema) + } else { + log.info paramsSummaryLog(input_workflow) + } + + // + // Validate the parameters using nextflow_schema.json or the schema + // given via the validation.parametersSchema configuration option + // + if(validate_params) { + if(parameters_schema) { + validateParameters(parameters_schema:parameters_schema) + } else { + validateParameters() + } + } + + emit: + dummy_emit = true +} + diff --git a/subworkflows/nf-core/utils_nfschema_plugin/meta.yml b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml new file mode 100644 index 0000000..f7d9f02 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml @@ -0,0 +1,35 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "utils_nfschema_plugin" +description: Run nf-schema to validate parameters and create a summary of changed parameters +keywords: + - validation + - JSON schema + - plugin + - parameters + - summary +components: [] +input: + - input_workflow: + type: object + description: | + The workflow object of the used pipeline. + This object contains meta data used to create the params summary log + - validate_params: + type: boolean + description: Validate the parameters and error if invalid. + - parameters_schema: + type: string + description: | + Path to the parameters JSON schema. + This has to be the same as the schema given to the `validation.parametersSchema` config + option. When this input is empty it will automatically use the configured schema or + "${projectDir}/nextflow_schema.json" as default. The schema should not be given in this way + for meta pipelines. +output: + - dummy_emit: + type: boolean + description: Dummy emit to make nf-core subworkflows lint happy +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test new file mode 100644 index 0000000..8fb3016 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test @@ -0,0 +1,117 @@ +nextflow_workflow { + + name "Test Subworkflow UTILS_NFSCHEMA_PLUGIN" + script "../main.nf" + workflow "UTILS_NFSCHEMA_PLUGIN" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/utils_nfschema_plugin" + tag "plugin/nf-schema" + + config "./nextflow.config" + + test("Should run nothing") { + + when { + + params { + test_data = '' + } + + workflow { + """ + validate_params = false + input[0] = workflow + input[1] = validate_params + input[2] = "" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should validate params") { + + when { + + params { + test_data = '' + outdir = null + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } + ) + } + } + + test("Should run nothing - custom schema") { + + when { + + params { + test_data = '' + } + + workflow { + """ + validate_params = false + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should validate params - custom schema") { + + when { + + params { + test_data = '' + outdir = null + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config new file mode 100644 index 0000000..0907ac5 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -0,0 +1,8 @@ +plugins { + id "nf-schema@2.1.0" +} + +validation { + parametersSchema = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + monochromeLogs = true +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json new file mode 100644 index 0000000..331e0d2 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "$defs": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/input_output_options" + }, + { + "$ref": "#/$defs/generic_options" + } + ] +} diff --git a/tower.yml b/tower.yml new file mode 100644 index 0000000..787aedf --- /dev/null +++ b/tower.yml @@ -0,0 +1,5 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" diff --git a/workflows/radseq.nf b/workflows/radseq.nf index e4ab1a5..d89ca79 100644 --- a/workflows/radseq.nf +++ b/workflows/radseq.nf @@ -1,212 +1,97 @@ /* -======================================================================================== - VALIDATE INPUTS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Validate input parameters -WorkflowRadseq.initialise(params, log) - -// Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config , params.genome, params.popmap] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } - -/* -======================================================================================== - CONFIG FILES -======================================================================================== -*/ - -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yaml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() - -/* -======================================================================================== - IMPORT LOCAL MODULES/SUBWORKFLOWS -======================================================================================== -*/ - -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { PROCESS_RAD } from '../subworkflows/local/fastp_processradtags' -include { CDHIT_RAINBOW as DENOVO } from '../subworkflows/local/cdhit_rainbow' -include { FASTQ_INDEX_ALIGN_BWA_MINIMAP as ALIGN } from '../subworkflows/local/fastq_index_align_bwa_minimap' -include { BAM_INTERVALS_BEDTOOLS } from '../subworkflows/local/bam_intervals_bedtools' -include { BAM_VARIANT_CALLING_FREEBAYES } from '../subworkflows/local/bam_variant_calling_freebayes' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_radseq_pipeline' /* -======================================================================================== - IMPORT NF-CORE MODULES/SUBWORKFLOWS -======================================================================================== -*/ - -// -// MODULE: Installed directly from nf-core/modules -// -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { SAMTOOLS_FAIDX } from '../modules/nf-core/samtools/faidx/main.nf' - -/* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow RADSEQ { - ch_versions = Channel.empty() - - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( - ch_input - ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - - // - // SUBWORKFLOW: remove/trim low quality reads, trim umi's - // - PROCESS_RAD ( - INPUT_CHECK.out.reads - ) - ch_versions = ch_versions.mix(PROCESS_RAD.out.versions) - - // assign fasta channel based on method in config file - switch ( params.method ) { - // assign ch_reference (input for aligning subworkflow) to the reference in the params - case 'reference': - ch_reference = Channel.fromPath(params.genome) - .map{genome -> tuple (genome.simpleName, genome)} - break - case 'denovo': - /* SUBWORKFLOW: Cluster READS after applying unique read thresholds within and among samples. - * option to provide a list of minimum depth thresholds. See nextflow.config for more details*/ - ch_reference = DENOVO ( - INPUT_CHECK.out.reads, - params.sequence_type // sequence type exe.: 'SE', 'PE', '' - ).fasta - ch_versions = ch_versions.mix(DENOVO.out.versions) - break - default: - exit 1, "unknown method: ${method} \n supported options:" + params.method_options - } - - // nf-core module index reference for bedtools + freebayes - ch_faidx = SAMTOOLS_FAIDX ( - ch_reference - ).fai - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) - - // - // SUBWORKFLOW: generate fasta indexes, align input files, dedup reads, index bam, calculate statistics - // if denovo and paired then pass length_stats to bwa mem - ch_bam_bai = ALIGN ( - PROCESS_RAD.out.trimmed_reads, - ch_reference, - ch_faidx, - params.sequence_type, - PROCESS_RAD.out.read_lengths - ).bam_bai - ch_versions = ch_versions.mix(ALIGN.out.versions) - - // - // SUBWORKFLOW: freebayes multithreading based on read coverage - // - ch_intervals = BAM_INTERVALS_BEDTOOLS ( - ch_bam_bai.map{meta, bam, bai -> [meta, bam]}, - ch_faidx.map{it[1]}, - PROCESS_RAD.out.read_lengths, - params.max_read_coverage_to_split - ).intervals - ch_versions = ch_versions.mix(BAM_INTERVALS_BEDTOOLS.out.versions) + take: + ch_samplesheet // channel: samplesheet read in from --input + main: - ch_bam_bai_bed = ALIGN.out.mbam_bai - .combine(ch_intervals.map{it[1]}) - .map { meta, bam, bai, bed -> - [[ - id: meta.id, - interval: bed.getName().tokenize( '.' )[1] - ], - bam, bai, bed] - } - // - // SUBWORKFLOW: freebayes parallel variant calling - // - vcf = BAM_VARIANT_CALLING_FREEBAYES ( - ch_bam_bai_bed, - true, - ch_reference.map{it[1]}, - ch_faidx.map{it[1]} - ).vcf - ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_FREEBAYES.out.versions) - + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() // // MODULE: Run FastQC // FASTQC ( - INPUT_CHECK.out.reads + ch_samplesheet ) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_' + 'radseq_software_' + 'mqc_' + 'versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } + // // MODULE: MultiQC // - workflow_summary = WorkflowRadseq.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(PROCESS_RAD.out.fastp_json.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ALIGN.out.stats.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ALIGN.out.flagstat.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ALIGN.out.idxstats.collect{it[1]}.ifEmpty([])) + ch_multiqc_config = Channel.fromPath( + "$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? + Channel.fromPath(params.multiqc_config, checkIfExists: true) : + Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath(params.multiqc_logo, checkIfExists: true) : + Channel.empty() + + summary_params = paramsSummaryMap( + workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_files = ch_multiqc_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? + file(params.multiqc_methods_description, checkIfExists: true) : + file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: true + ) + ) MULTIQC ( ch_multiqc_files.collect(), - [], + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList(), [], [] ) - multiqc_report = MULTIQC.out.report.toList() - ch_versions = ch_versions.mix(MULTIQC.out.versions) -} -/* -======================================================================================== - COMPLETION EMAIL AND SUMMARY -======================================================================================== -*/ + emit:multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */