diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e8b164..1a2ffd6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#54](https://github.com/nf-core/createpanelrefs/pull/54) - Update nft-utils to 0.0.4 - [#55](https://github.com/nf-core/createpanelrefs/pull/55) - Prepare relase 1.0.0 - [#63](https://github.com/nf-core/createpanelrefs/pull/63) - Template update for nf-core/tools v3.5.0dev +- [#66](https://github.com/nf-core/createpanelrefs/pull/66) - Update `GENS` to allow for creating a long-read PON ### `Fixed` @@ -56,6 +57,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n | gatk4 | | 4.6.2.0 | | gawk | | 5.3.0 | | htslib | | 1.22.1 | +| mosdepth | | 0.3.11 | | multiqc | | 1.32 | | samtools | | 1.22.1 | diff --git a/conf/modules/gens_pon.config b/conf/modules/gens_pon.config index e0d99c8..d7e7202 100644 --- a/conf/modules/gens_pon.config +++ b/conf/modules/gens_pon.config @@ -17,22 +17,70 @@ process { ] } + withName: '.*GENS_PON:CAT_CAT' { + ext.prefix = { "${meta.id}_concat" } + } + withName: '.*GENS_PON:GATK4_COLLECTREADCOUNTS' { - ext.args = {"--format ${params.gens_readcount_format} --imr OVERLAPPING_ONLY"} + ext.args = { "--format ${params.gens_readcount_format} --imr OVERLAPPING_ONLY" } publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/gens_pon/readcounts" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode, + path: { "${params.outdir}/gens_pon/readcounts" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*GENS_PON:INTERVAL_LIST_TO_BED' { + ext.prefix = { "gens_coverage_bins" } + ext.suffix = "bed" + ext.args2 = '\'BEGIN { FS=OFS="\t" } $2 < $3 { print $1, $2, $3 }\'' + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/references/intervals/gens_pon/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } withName: '.*GENS_PON:GATK4_CREATEREADCOUNTPANELOFNORMALS' { - ext.args = { ["--minimum-interval-median-percentile ${params.gens_min_interval_median_percentile}", - "--maximum-chunk-size ${params.gens_maximum_chunk_size}"].join(" ")} + ext.args = { + [ + "--minimum-interval-median-percentile ${params.gens_min_interval_median_percentile}", + "--maximum-chunk-size ${params.gens_maximum_chunk_size}", + ].join(" ") + } publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/gens_pon/createreadcountpanelofnormals" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode, + path: { "${params.outdir}/gens_pon/createreadcountpanelofnormals" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } + + withName: '.*GENS_PON:MOSDEPTH' { + ext.args = { + [ + '--no-per-base', + '--fast-mode', + ].join(' ') + } + } + + withName: '.*GENS_PON:MOSDEPTH_GATK_FORMAT' { + ext.prefix = { "${meta.id}_gatk_formatted" } + ext.suffix = "gatk_format.tsv" + ext.args = { "-v sample=${meta.id}" } + ext.args2 = '\'BEGIN { OFS="\\t" } { $4 = int($4 + 0.5); $2++; print $1, $2, $3, $4 }\'' + } + + withName: '.*GENS_PON:MOSDEPTH_GATK_HEADER' { + // This requires meta.id to be the same as the sample name in the BAM/CRAM header + ext.args = { "-v sample=${meta.id}" } + ext.args2 = '\'{ print } END { print "@RG\\tID:GATKCopyNumber\\tSM:" sample; print "CONTIG\\tSTART\\tEND\\tCOUNT" }\'' + ext.prefix = { "${meta.id}" } + ext.suffix = 'mosdepth_gatk_header.tsv' + } + + withName: '.*GENS_PON:SAMTOOLS_VIEW' { + ext.args = '-H --output-fmt sam' + ext.prefix = { "${meta.id}.mosdepth_gatk_header" } + } } diff --git a/docs/output.md b/docs/output.md index 3480956..e2639a1 100644 --- a/docs/output.md +++ b/docs/output.md @@ -88,11 +88,14 @@ The reference file contains coverage information normalized across the cohort an - `{pon_name}.hdf5`: Final panel of normals file in HDF5 format. - `references/intervals/gens_pon/` - `*.interval_list`: Interval list file used for read count collection. + - `*.bed`: BED versions of interval list file used for read count collection for long-reads. [GENS](https://github.com/Clinical-Genomics-Lund/gens) creates a panel of normals for read-count denoising to improve somatic variant detection. The workflow: (1) indexes BAM/CRAM files if needed, (2) collects read counts at specified intervals using GATK's CollectReadCounts, and (3) creates a panel of normals using GATK's CreateReadCountPanelOfNormals. This panel can be used with GENS for somatic variant calling to reduce technical noise and improve variant detection sensitivity. +When `gens_analysis_type` is set to 'lrs', a modified version of the workflow above is run where coverage calculated by mosdepth is used instead of read counts. + ### MultiQC
diff --git a/docs/usage.md b/docs/usage.md index ebe09a8..c9d6da6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -131,6 +131,7 @@ If you are running the pipeline to generate references for the GENS workflow, yo | ------------- | --------------------- | | fasta/genomes | fai | | | dict | +| | gens_analysis_type | | | gens_bin_length | | | gens_pon_name | | | gens_readcount_format | @@ -138,6 +139,9 @@ If you are running the pipeline to generate references for the GENS workflow, yo The GENS workflow creates a panel of normals for read-count denoising from normal samples. This panel can be used with GENS for somatic variant calling to reduce technical noise and improve variant detection. For more information, see the [GENS documentation](https://github.com/Clinical-Genomics-Lund/gens). +> [!NOTE] +> If `--gens_analysis_type` is set to 'lrs', this reqires the sample ID set in the sample sheet to be equal to the `SM` tag in the BAM-file. + ### germlinecnvcaller If you are running the pipeline to generate references for the GATK's germlinecnvcalling workflow, you should ensure that you have provided all the mandatory options specified in the table below. diff --git a/main.nf b/main.nf index 0ba1739..2b41c60 100644 --- a/main.nf +++ b/main.nf @@ -137,6 +137,7 @@ workflow { PIPELINE_INITIALISATION.out.samplesheet, params.tools ?: "no_tools", params.gcnv_model_name, + params.gens_analysis_type, params.gens_pon_name, params.mutect2_pon_name, fasta, @@ -232,6 +233,7 @@ workflow NFCORE_CREATEPANELREFS { samplesheet // channel: samplesheet read in from --input tools // string: comma separated list of tools to run gcnv_model_name // string: name of gcnv model + gens_analysis_type // string: type of analysis for gens pon ('lrs' or 'srs') gens_pon_name // string: name of gens pon mutect2_pon_name // string: name of mutect2 pon fasta // channel: [meta, fasta] @@ -250,7 +252,7 @@ workflow NFCORE_CREATEPANELREFS { main: // WORKFLOW: Run pipeline - CREATEPANELREFS(samplesheet, tools, gcnv_model_name, gens_pon_name, mutect2_pon_name, fasta, dict, fai, cnvkit_targets, gcnv_exclude_bed, gcnv_exclude_interval_list, gcnv_mappable_regions, gcnv_ploidy_priors, gcnv_segmental_duplications, gcnv_target_bed, gcnv_target_interval_list, gens_interval_list, mutect2_target_bed) + CREATEPANELREFS(samplesheet, tools, gcnv_model_name, gens_analysis_type, gens_pon_name, mutect2_pon_name, fasta, dict, fai, cnvkit_targets, gcnv_exclude_bed, gcnv_exclude_interval_list, gcnv_mappable_regions, gcnv_ploidy_priors, gcnv_segmental_duplications, gcnv_target_bed, gcnv_target_interval_list, gens_interval_list, mutect2_target_bed) emit: versions = CREATEPANELREFS.out.versions // channel: versions.yml diff --git a/modules.json b/modules.json index ef16fa5..96ef637 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "cat/cat": { + "branch": "master", + "git_sha": "69614d4579a6bd9b8a2ecffb35959809d9c36559", + "installed_by": ["modules"] + }, "cnvkit/batch": { "branch": "master", "git_sha": "09223d6de1dab602242c4c57ab2a4599d460e528", @@ -85,6 +90,11 @@ "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", "installed_by": ["modules"] }, + "mosdepth": { + "branch": "master", + "git_sha": "6832b69ef7f98c54876d6436360b6b945370c615", + "installed_by": ["modules"] + }, "multiqc": { "branch": "master", "git_sha": "af27af1be706e6a2bb8fe454175b0cdf77f47b49", diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 0000000..9851176 --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 0000000..aa72fc4 --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,78 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.8' : + 'biocontainers/pigz:2.8' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + tuple val("${task.process}"), val("pigz"), eval("pigz --version 2>&1 | sed 's/pigz //g'"), topic: versions, emit: versions_cat + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // choose appropriate concatenation tool depending on input and output format + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 0000000..36a7359 --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,63 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" + ontologies: [] +output: + file_out: + - - meta: + type: map + description: Groovy Map containing sample information + - ${prefix}: + type: file + description: Concatenated file. Will be gzipped if file_out ends with + ".gz" + pattern: "${file_out}" + ontologies: [] + versions_cat: + - - ${task.process}: + type: string + description: The name of the process + - pigz: + type: string + description: The name of the tool + - "pigz --version 2>&1 | sed 's/pigz //g'": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - pigz: + type: string + description: The name of the tool + - "pigz --version 2>&1 | sed 's/pigz //g'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 0000000..030c664 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,192 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("sarscov2 - genome - error: name conflict") { + when { + process { + """ + input[0] = + [ + [ id:'genome', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ fasta, sizes ] - unzipped") { + when { + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("sarscov2 - [ gff3_gz, maf_gz ] - zipped") { + when { + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() + } + ) + } + } + + test("sarscov2 - [ gff3_gz, maf_gz ] - unzipped") { + config './nextflow.config' + + when { + params { + cat_prefix = "cat.txt" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ fasta, sizes ] - zipped") { + config './nextflow.config' + + when { + params { + cat_prefix = "cat.txt.gz" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - fasta - zipped") { + config './nextflow.config' + + when { + params { + cat_prefix = "cat.txt.gz" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - fasta - unzipped - stub") { + options "-stub" + + when { + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 0000000..5b4e4cc --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,283 @@ +{ + "sarscov2 - [ gff3_gz, maf_gz ] - unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T09:08:31.479828" + }, + "sarscov2 - fasta - unzipped - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T16:16:28.118094" + }, + "sarscov2 - [ fasta, sizes ] - zipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T16:15:56.529595" + }, + "sarscov2 - genome - error: name conflict": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "file_out": [ + + ], + "versions_cat": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T16:14:54.496538" + }, + "sarscov2 - [ fasta, sizes ] - unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T11:26:29.942203" + }, + "sarscov2 - [ gff3_gz, maf_gz ] - zipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T11:26:45.679401" + }, + "sarscov2 - fasta - zipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,6e9fe4042a72f2345f644f239272b7e6" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,6e9fe4042a72f2345f644f239272b7e6" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T16:16:12.439911" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow.config b/modules/nf-core/cat/cat/tests/nextflow.config new file mode 100644 index 0000000..5bc9bf5 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = "${params.cat_prefix}" + } +} diff --git a/modules/nf-core/mosdepth/environment.yml b/modules/nf-core/mosdepth/environment.yml new file mode 100644 index 0000000..1c7f3ee --- /dev/null +++ b/modules/nf-core/mosdepth/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/mosdepth + - htslib=1.22.1 + - mosdepth=0.3.11=h0ec343a_1 diff --git a/modules/nf-core/mosdepth/main.nf b/modules/nf-core/mosdepth/main.nf new file mode 100644 index 0000000..63739bf --- /dev/null +++ b/modules/nf-core/mosdepth/main.nf @@ -0,0 +1,77 @@ +process MOSDEPTH { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/00/00d32b53160c26794959da7303ee6e2107afd4d292060c9f287b0af1fddbd847/data' : + 'community.wave.seqera.io/library/mosdepth_htslib:0f58993cb6d93294'}" + + input: + tuple val(meta), path(bam), path(bai), path(bed) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path('*.global.dist.txt') , emit: global_txt + tuple val(meta), path('*.summary.txt') , emit: summary_txt + tuple val(meta), path('*.region.dist.txt') , optional:true, emit: regions_txt + tuple val(meta), path('*.per-base.d4') , optional:true, emit: per_base_d4 + tuple val(meta), path('*.per-base.bed.gz') , optional:true, emit: per_base_bed + tuple val(meta), path('*.per-base.bed.gz.csi') , optional:true, emit: per_base_csi + tuple val(meta), path('*.regions.bed.gz') , optional:true, emit: regions_bed + tuple val(meta), path('*.regions.bed.gz.csi') , optional:true, emit: regions_csi + tuple val(meta), path('*.quantized.bed.gz') , optional:true, emit: quantized_bed + tuple val(meta), path('*.quantized.bed.gz.csi') , optional:true, emit: quantized_csi + tuple val(meta), path('*.thresholds.bed.gz') , optional:true, emit: thresholds_bed + tuple val(meta), path('*.thresholds.bed.gz.csi'), optional:true, emit: thresholds_csi + tuple val("${task.process}"), val('mosdepth'), eval("mosdepth --version | sed 's/mosdepth //g'"), topic: versions, emit: versions_mosdepth + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--fasta ${fasta}" : "" + def interval = bed ? "--by ${bed}" : "" + if (bed && (args.contains("--by") || args.contains("-b "))) { + error "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" + } + if (args.contains("--thresholds") && !(bed || args.contains("--by") || args.contains("-b "))) { + error "'--thresholds' can only be specified in conjunction with '--by' or an input bed file" + } + + """ + mosdepth \\ + --threads $task.cpus \\ + $interval \\ + $reference \\ + $args \\ + $prefix \\ + $bam + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (bed && (args.contains("--by") || args.contains("-b "))) { + error "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" + } + if (args.contains("--thresholds") && !(bed || args.contains("--by") || args.contains("-b "))) { + error "'--thresholds' can only be specified in conjunction with '--by' or an input bed file" + } + """ + touch ${prefix}.global.dist.txt + touch ${prefix}.region.dist.txt + touch ${prefix}.summary.txt + touch ${prefix}.per-base.d4 + echo "" | gzip > ${prefix}.per-base.bed.gz + touch ${prefix}.per-base.bed.gz.csi + echo "" | gzip > ${prefix}.regions.bed.gz + touch ${prefix}.regions.bed.gz.csi + echo "" | gzip > ${prefix}.quantized.bed.gz + touch ${prefix}.quantized.bed.gz.csi + echo "" | gzip > ${prefix}.thresholds.bed.gz + touch ${prefix}.thresholds.bed.gz.csi + """ +} diff --git a/modules/nf-core/mosdepth/meta.yml b/modules/nf-core/mosdepth/meta.yml new file mode 100644 index 0000000..04c8bfe --- /dev/null +++ b/modules/nf-core/mosdepth/meta.yml @@ -0,0 +1,211 @@ +name: mosdepth +description: Calculates genome-wide sequencing coverage. +keywords: + - mosdepth + - bam + - cram + - coverage +tools: + - mosdepth: + description: | + Fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. + documentation: https://github.com/brentp/mosdepth + doi: 10.1093/bioinformatics/btx699 + licence: ["MIT"] + identifier: biotools:mosdepth +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM/CRAM file + pattern: "*.{bam,cram}" + ontologies: [] + - bai: + type: file + description: Index for BAM/CRAM file + pattern: "*.{bai,crai}" + ontologies: [] + - bed: + type: file + description: BED file with intersected intervals + pattern: "*.{bed}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing bed information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta}" + ontologies: [] +output: + global_txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.global.dist.txt": + type: file + description: Text file with global cumulative coverage distribution + pattern: "*.{global.dist.txt}" + ontologies: [] + summary_txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.summary.txt": + type: file + description: Text file with summary mean depths per chromosome and regions + pattern: "*.{summary.txt}" + ontologies: [] + regions_txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.region.dist.txt": + type: file + description: Text file with region cumulative coverage distribution + pattern: "*.{region.dist.txt}" + ontologies: [] + per_base_d4: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.per-base.d4": + type: file + description: D4 file with per-base coverage + pattern: "*.{per-base.d4}" + ontologies: [] + per_base_bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.per-base.bed.gz": + type: file + description: BED file with per-base coverage + pattern: "*.{per-base.bed.gz}" + ontologies: [] + per_base_csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.per-base.bed.gz.csi": + type: file + description: Index file for BED file with per-base coverage + pattern: "*.{per-base.bed.gz.csi}" + ontologies: [] + regions_bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.regions.bed.gz": + type: file + description: BED file with per-region coverage + pattern: "*.{regions.bed.gz}" + ontologies: [] + regions_csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.regions.bed.gz.csi": + type: file + description: Index file for BED file with per-region coverage + pattern: "*.{regions.bed.gz.csi}" + ontologies: [] + quantized_bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.quantized.bed.gz": + type: file + description: BED file with binned coverage + pattern: "*.{quantized.bed.gz}" + ontologies: [] + quantized_csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.quantized.bed.gz.csi": + type: file + description: Index file for BED file with binned coverage + pattern: "*.{quantized.bed.gz.csi}" + ontologies: [] + thresholds_bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.thresholds.bed.gz": + type: file + description: BED file with the number of bases in each region that are covered + at or above each threshold + pattern: "*.{thresholds.bed.gz}" + ontologies: [] + thresholds_csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.thresholds.bed.gz.csi": + type: file + description: Index file for BED file with threshold coverage + pattern: "*.{thresholds.bed.gz.csi}" + ontologies: [] + versions_mosdepth: + - - ${task.process}: + type: string + description: The process the versions were collected from + - mosdepth: + type: string + description: The tool name + - "mosdepth --version | sed 's/mosdepth //g'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - mosdepth: + type: string + description: The tool name + - "mosdepth --version | sed 's/mosdepth //g'": + type: string + description: The command used to generate the version of the tool +authors: + - "@joseespinosa" + - "@drpatelh" + - "@ramprasadn" + - "@matthdsm" +maintainers: + - "@joseespinosa" + - "@ramprasadn" + - "@matthdsm" diff --git a/modules/nf-core/mosdepth/tests/main.nf.test b/modules/nf-core/mosdepth/tests/main.nf.test new file mode 100644 index 0000000..b05dde5 --- /dev/null +++ b/modules/nf-core/mosdepth/tests/main.nf.test @@ -0,0 +1,268 @@ +nextflow_process { + + name "Test Process MOSDEPTH" + script "../main.nf" + process "MOSDEPTH" + + tag "modules" + tag "modules_nfcore" + tag "mosdepth" + config "./nextflow.config" + + test("homo_sapiens - bam, bai, []") { + + when { + params { + module_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - bam, bai, bed") { + + when { + params { + module_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - cram, crai, []") { + + when { + params { + module_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - cram, crai, bed") { + + when { + params { + module_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - bam, bai, [] - window") { + + when { + params { + module_args = "--by 100" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - bam, bai, [] - quantized") { + + when { + params { + module_args = "--quantize 0:1:4:100:200" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - bam, bai, bed - thresholds") { + + when { + params { + module_args = "--thresholds 1,10,20,30" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - bam, bai, bed - fail") { + + when { + params { + module_args = "--by 100" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.failed + } + + } + + test("homo_sapiens - bam, bai, [] - stub") { + + options "-stub" + when { + params { + module_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + +} diff --git a/modules/nf-core/mosdepth/tests/main.nf.test.snap b/modules/nf-core/mosdepth/tests/main.nf.test.snap new file mode 100644 index 0000000..c27fcc7 --- /dev/null +++ b/modules/nf-core/mosdepth/tests/main.nf.test.snap @@ -0,0 +1,1450 @@ +{ + "homo_sapiens - bam, bai, [] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.global.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.region.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.d4:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.global.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "per_base_d4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.d4:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "quantized_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "quantized_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.region.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "thresholds_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "thresholds_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:06:13.219131" + }, + "homo_sapiens - cram, crai, bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:14.011309" + }, + "homo_sapiens - bam, bai, [] - quantized": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,f037c215449d361112efc10108fcc17c" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,c0a3176a59010639455a4aefb3f247ef" + ] + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,f037c215449d361112efc10108fcc17c" + ] + ], + "quantized_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,c0a3176a59010639455a4aefb3f247ef" + ] + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:22.818082" + }, + "homo_sapiens - bam, bai, bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:04.449943" + }, + "homo_sapiens - bam, bai, [] - window": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,0b6ea9f0da1228252d9aef2d3b6f7f76" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,34f48d16fcdd61e44d812e29e02c77b8" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,17a2cbe22a948d7c004b90a1f28347a1" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,34f48d16fcdd61e44d812e29e02c77b8" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,17a2cbe22a948d7c004b90a1f28347a1" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,0b6ea9f0da1228252d9aef2d3b6f7f76" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:18.435089" + }, + "homo_sapiens - bam, bai, []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:21:59.785829" + }, + "homo_sapiens - cram, crai, []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:09.294766" + }, + "homo_sapiens - bam, bai, bed - thresholds": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,fe70ae728cd10726c42a2bcd44adfc9d" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,2c52ab89e7496af475de3cb2ca04c7b3" + ] + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,fe70ae728cd10726c42a2bcd44adfc9d" + ] + ], + "thresholds_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,2c52ab89e7496af475de3cb2ca04c7b3" + ] + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:27.300204" + } +} \ No newline at end of file diff --git a/modules/nf-core/mosdepth/tests/nextflow.config b/modules/nf-core/mosdepth/tests/nextflow.config new file mode 100644 index 0000000..b21c05b --- /dev/null +++ b/modules/nf-core/mosdepth/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: "MOSDEPTH" { + ext.args = params.module_args + } +} diff --git a/nextflow.config b/nextflow.config index 2fcd3c2..fbbc363 100644 --- a/nextflow.config +++ b/nextflow.config @@ -37,6 +37,7 @@ params { gens_min_interval_median_percentile = 5.0 gens_pon_name = 'gens' gens_readcount_format = 'HDF5' + gens_analysis_type = 'srs' // CNVkit options cnvkit_targets = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 0b43a3f..458cfdf 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -217,6 +217,12 @@ "description": "Options used by the gens subworkflow", "default": "", "properties": { + "gens_analysis_type": { + "type": "string", + "description": "GENS panel of normals analysis type ('srs' or 'lrs').", + "default": "srs", + "enum": ["srs", "lrs"] + }, "gens_bin_length": { "type": "number", "default": 100, diff --git a/subworkflows/local/gens_pon/main.nf b/subworkflows/local/gens_pon/main.nf index 99fb8e7..db3c2f1 100644 --- a/subworkflows/local/gens_pon/main.nf +++ b/subworkflows/local/gens_pon/main.nf @@ -1,10 +1,17 @@ include { GATK4_COLLECTREADCOUNTS } from '../../../modules/nf-core/gatk4/collectreadcounts' include { GATK4_CREATEREADCOUNTPANELOFNORMALS } from '../../../modules/nf-core/gatk4/createreadcountpanelofnormals' include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index' +include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view' +include { MOSDEPTH } from '../../../modules/nf-core/mosdepth' +include { GAWK as MOSDEPTH_GATK_HEADER } from '../../../modules/nf-core/gawk' +include { GAWK as MOSDEPTH_GATK_FORMAT } from '../../../modules/nf-core/gawk' +include { GAWK as INTERVAL_LIST_TO_BED } from '../../../modules/nf-core/gawk' +include { CAT_CAT } from '../../../modules/nf-core/cat/cat' workflow GENS_PON { take: ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] + val_analysis_type // string: [mandatory] type of analysis ('lrs' or 'srs') val_pon_name // string: [optional] name for panel of normals ch_dict // channel: [optional] [ val(meta), path(dict) ] ch_fai // channel: [optional] [ val(meta), path(fai) ] @@ -13,6 +20,7 @@ workflow GENS_PON { main: versions = channel.empty() + ch_readcounts_out = channel.empty() // Filter out files that lack indices, and generate them ch_input @@ -25,6 +33,7 @@ workflow GENS_PON { .set { ch_for_mix } SAMTOOLS_INDEX(ch_for_mix.alignment_without_index) + versions = versions.mix(SAMTOOLS_INDEX.out.versions) SAMTOOLS_INDEX.out.bai .mix(SAMTOOLS_INDEX.out.crai) @@ -34,25 +43,89 @@ workflow GENS_PON { ch_for_mix.alignment_without_index .join(ch_index) .mix(ch_for_mix.alignment_with_index) - .combine(ch_interval_list.map { it -> it[1] }) - .set { ch_readcounts_in } + .set { ch_bam_bai } + + if (val_analysis_type == 'srs') { + ch_bam_bai + .combine(ch_interval_list.map { _meta, interval_list -> interval_list }) + .set { ch_readcounts_in } + + // Collect read counts, and generate models + GATK4_COLLECTREADCOUNTS(ch_readcounts_in, ch_fasta, ch_fai, ch_dict) + versions = versions.mix(GATK4_COLLECTREADCOUNTS.out.versions) + + GATK4_COLLECTREADCOUNTS.out.tsv + .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) + .set { ch_readcounts } - // Collect read counts, and generate models - GATK4_COLLECTREADCOUNTS(ch_readcounts_in, ch_fasta, ch_fai, ch_dict) + } else if (val_analysis_type == 'lrs') { - GATK4_COLLECTREADCOUNTS.out.tsv - .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) - .collect { it[1] } - .map { it -> - return [[id: val_pon_name], it] + INTERVAL_LIST_TO_BED( + ch_interval_list, [], [] + ) + versions = versions.mix(INTERVAL_LIST_TO_BED.out.versions) + + ch_bam_bai + .combine(INTERVAL_LIST_TO_BED.out.output) + .map { meta, bam, bai, _bins_meta, bins -> + [meta, bam, bai, bins] } - .set { ch_readcounts_out } + .set { ch_mosdepth_in } + + // Prepare the body + MOSDEPTH( + ch_mosdepth_in, + [[],[]] + ) + + // Prepare the header + SAMTOOLS_VIEW( + ch_bam_bai, + [[],[]], + [], + false + ) + versions = versions.mix(SAMTOOLS_VIEW.out.versions) + + MOSDEPTH_GATK_HEADER( + SAMTOOLS_VIEW.out.sam, + [], + false + ) + versions = versions.mix(MOSDEPTH_GATK_HEADER.out.versions) - GATK4_CREATEREADCOUNTPANELOFNORMALS(ch_readcounts_out) - versions = versions.mix(GATK4_COLLECTREADCOUNTS.out.versions) + MOSDEPTH_GATK_FORMAT( + MOSDEPTH.out.regions_bed, + [], + false + ) + versions = versions.mix(MOSDEPTH_GATK_FORMAT.out.versions) + + // Prepare GATK inputs + MOSDEPTH_GATK_HEADER.out.output + .join(MOSDEPTH_GATK_FORMAT.out.output) + .map { meta, header, body -> [meta, [header, body]] } + .set { ch_cat_in } + + CAT_CAT(ch_cat_in) + + CAT_CAT.out.file_out + .map { meta, gatk_input -> + return [meta, gatk_input] + } + .set { ch_readcounts } + + } + + ch_readcounts + .collect { _meta, readcounts -> readcounts } + .map { readcounts -> [[id: val_pon_name], readcounts] } + .set { ch_create_pon_in } + + GATK4_CREATEREADCOUNTPANELOFNORMALS(ch_create_pon_in) + versions = versions.mix(GATK4_CREATEREADCOUNTPANELOFNORMALS.out.versions) - versions = versions.mix(SAMTOOLS_INDEX.out.versions) emit: genspon = GATK4_CREATEREADCOUNTPANELOFNORMALS.out.pon diff --git a/tests/csv/1.0.0/bam.csv b/tests/csv/1.0.0/bam.csv index b95f604..47a938f 100644 --- a/tests/csv/1.0.0/bam.csv +++ b/tests/csv/1.0.0/bam.csv @@ -1,3 +1,3 @@ sample,bam,bai -sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai -sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai +normal,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai +tumour,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai diff --git a/tests/csv/1.0.0/bam_sorted.csv b/tests/csv/1.0.0/bam_sorted.csv index 46428ed..f3fd62d 100644 --- a/tests/csv/1.0.0/bam_sorted.csv +++ b/tests/csv/1.0.0/bam_sorted.csv @@ -1,3 +1,3 @@ sample,bam,bai -sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai -sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai +testN,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai +testT,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai diff --git a/tests/gens_pon.nf.test b/tests/gens_pon.nf.test index 2411c02..fed4001 100644 --- a/tests/gens_pon.nf.test +++ b/tests/gens_pon.nf.test @@ -21,6 +21,41 @@ nextflow_pipeline { } } + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success + assertAll( + { assert snapshot( + // pipeline versions.yml file for multiqc from which Nextflow and pipeline versions are removed (all from the workflow key) + removeFromYamlMap("${outputDir}/pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", "Workflow"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } + + test("-profile test --tools gens --input tests/csv/1.0.0/bam_sorted.csv --gens_bin_length 100 --gens_pon_name gens_pon --gens_readcount_format TSV --gens_analysis_type lrs") { + + when { + params { + genome = 'GRCh38.chr22.testdata' + gens_analysis_type = 'lrs' + gens_bin_length = 100 + gens_interval_list = null + gens_pon_name = 'gens_pon' + gens_readcount_format = "TSV" + input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" + outdir = "$outputDir" + tools = 'gens' + } + } + then { // stable_name: All files + folders in ${params.outdir}/ with a stable name def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) diff --git a/tests/gens_pon.nf.test.snap b/tests/gens_pon.nf.test.snap index 357d60d..1e76976 100644 --- a/tests/gens_pon.nf.test.snap +++ b/tests/gens_pon.nf.test.snap @@ -1,4 +1,59 @@ { + "-profile test --tools gens --input tests/csv/1.0.0/bam_sorted.csv --gens_bin_length 100 --gens_pon_name gens_pon --gens_readcount_format TSV --gens_analysis_type lrs": { + "content": [ + { + "GATK4_CREATEREADCOUNTPANELOFNORMALS": { + "gatk4": "4.6.2.0" + }, + "GATK4_PREPROCESSINTERVALS_GENS": { + "gatk4": "4.6.2.0" + }, + "INTERVAL_LIST_TO_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH_GATK_FORMAT": { + "gawk": "5.3.0" + }, + "MOSDEPTH_GATK_HEADER": { + "gawk": "5.3.0" + }, + "SAMTOOLS_VIEW": { + "samtools": "1.22.1" + } + }, + [ + "gens_pon", + "gens_pon/createreadcountpanelofnormals", + "gens_pon/createreadcountpanelofnormals/gens_pon.hdf5", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", + "references", + "references/intervals", + "references/intervals/gens_pon", + "references/intervals/gens_pon/genome.interval_list", + "references/intervals/gens_pon/gens_coverage_bins.bed" + ], + [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "gens_coverage_bins.bed:md5,b5c7f328aaf419595302baaa16f5b649" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.12.0" + }, + "timestamp": "2026-02-06T14:48:19.423593928" + }, "-profile test --tools gens --input tests/csv/1.0.0/bam_sorted.csv --gens_bin_length 100 --gens_pon_name gens_pon --gens_readcount_format TSV": { "content": [ { @@ -17,8 +72,8 @@ "gens_pon/createreadcountpanelofnormals", "gens_pon/createreadcountpanelofnormals/gens_pon.hdf5", "gens_pon/readcounts", - "gens_pon/readcounts/sample1.tsv", - "gens_pon/readcounts/sample2.tsv", + "gens_pon/readcounts/testN.tsv", + "gens_pon/readcounts/testT.tsv", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/llms-full.txt", @@ -37,15 +92,15 @@ "references/intervals/gens_pon/genome.interval_list" ], [ - "sample1.tsv:md5,bfceab35109b04317f38f9cc1cde71ca", - "sample2.tsv:md5,7141d08cdc26f6057557be9e23ef4365", + "testN.tsv:md5,bfceab35109b04317f38f9cc1cde71ca", + "testT.tsv:md5,7141d08cdc26f6057557be9e23ef4365", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nextflow": "25.12.0" }, - "timestamp": "2025-10-31T14:33:30.730355032" + "timestamp": "2026-02-06T14:49:45.524921421" } } \ No newline at end of file diff --git a/tests/germlinecnvcaller_cohort.nf.test.snap b/tests/germlinecnvcaller_cohort.nf.test.snap index 35037b8..90d9d7b 100644 --- a/tests/germlinecnvcaller_cohort.nf.test.snap +++ b/tests/germlinecnvcaller_cohort.nf.test.snap @@ -130,8 +130,8 @@ "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_log_mean_bias_t.tsv", "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_psi_t_log__.tsv", "germlinecnvcaller/readcounts", - "germlinecnvcaller/readcounts/sample1.hdf5", - "germlinecnvcaller/readcounts/sample2.hdf5", + "germlinecnvcaller/readcounts/testN.hdf5", + "germlinecnvcaller/readcounts/testT.hdf5", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/llms-full.txt", @@ -173,8 +173,8 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nextflow": "25.12.0" }, - "timestamp": "2025-10-31T15:58:51.382285872" + "timestamp": "2026-02-06T15:12:44.760563402" } } \ No newline at end of file diff --git a/tests/mutect2.nf.test.snap b/tests/mutect2.nf.test.snap index e1c5de8..eba04d6 100644 --- a/tests/mutect2.nf.test.snap +++ b/tests/mutect2.nf.test.snap @@ -75,12 +75,9 @@ }, [ "gatk4", - "gatk4/sample1.vcf.gz", - "gatk4/sample1.vcf.gz.stats", - "gatk4/sample1.vcf.gz.tbi", - "gatk4/sample2.vcf.gz", - "gatk4/sample2.vcf.gz.stats", - "gatk4/sample2.vcf.gz.tbi", + "gatk4/normal.vcf.gz", + "gatk4/normal.vcf.gz.stats", + "gatk4/normal.vcf.gz.tbi", "gatk4/test", "gatk4/test.vcf.gz", "gatk4/test.vcf.gz.tbi", @@ -91,6 +88,9 @@ "gatk4/test/chr21$25689498$46709983", "gatk4/test/vcfheader.vcf", "gatk4/test/vidmap.json", + "gatk4/tumour.vcf.gz", + "gatk4/tumour.vcf.gz.stats", + "gatk4/tumour.vcf.gz.tbi", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/llms-full.txt", @@ -105,15 +105,15 @@ "pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml" ], [ - "sample1.vcf.gz.stats:md5,b569ce66bbffe9588b3d221e821023ee", - "sample2.vcf.gz.stats:md5,76f749c53212d72e98801f6030fbf8a6", + "normal.vcf.gz.stats:md5,b569ce66bbffe9588b3d221e821023ee", + "tumour.vcf.gz.stats:md5,76f749c53212d72e98801f6030fbf8a6", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nextflow": "25.12.0" }, - "timestamp": "2025-10-31T15:08:35.629022911" + "timestamp": "2026-02-06T11:30:00.137671531" } } \ No newline at end of file diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 99a91da..e7b3cd9 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -15,6 +15,7 @@ workflow CREATEPANELREFS { samplesheet // channel: samplesheet read in from --input tools // array: tools to run, or no_tools if none (it's actually comma separated values string, but close enough) gcnv_model_name // string: name of gcnv model + gens_analysis_type // string: type of analysis for gens pon ('lrs' or 'srs') gens_pon_name // string: name of gens pon mutect2_pon_name // string: name of mutect2 pon fasta // channel: [meta, fasta] @@ -123,6 +124,7 @@ workflow CREATEPANELREFS { GENS_PON( gens_input, + gens_analysis_type, gens_pon_name, dict, fai,