From 56733ffe0e96caee6904d07e7a13327fa506f835 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 09:35:10 +0200 Subject: [PATCH 001/234] remove unused modules/subworkflows --- modules/local/samplesheet_check.nf | 31 --------------------- subworkflows/local/input_check.nf | 44 ------------------------------ 2 files changed, 75 deletions(-) delete mode 100644 modules/local/samplesheet_check.nf delete mode 100644 subworkflows/local/input_check.nf diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index 7ae3540..0000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_single' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/createpanelrefs/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index 0aecf87..0000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,44 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - .set { reads } - - emit: - reads // channel: [ val(meta), [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() - - // add path(s) of the fastq file(s) to the meta map - def fastq_meta = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] - } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] - } - return fastq_meta -} From 3e4c7c9797a8f029944fd25b6a52c94693b5131a Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 09:35:45 +0200 Subject: [PATCH 002/234] add cnvkit --- modules.json | 5 ++ modules/nf-core/cnvkit/batch/main.nf | 105 ++++++++++++++++++++++++++ modules/nf-core/cnvkit/batch/meta.yml | 86 +++++++++++++++++++++ 3 files changed, 196 insertions(+) create mode 100644 modules/nf-core/cnvkit/batch/main.nf create mode 100644 modules/nf-core/cnvkit/batch/meta.yml diff --git a/modules.json b/modules.json index fa09921..89ff874 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "cnvkit/batch": { + "branch": "master", + "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "installed_by": ["modules"] + }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543", diff --git a/modules/nf-core/cnvkit/batch/main.nf b/modules/nf-core/cnvkit/batch/main.nf new file mode 100644 index 0000000..1e4d81e --- /dev/null +++ b/modules/nf-core/cnvkit/batch/main.nf @@ -0,0 +1,105 @@ +process CNVKIT_BATCH { + tag "$meta.id" + label 'process_low' + + conda "bioconda::cnvkit=0.9.9 bioconda::samtools=1.16.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:3bdd798e4b9aed6d3e1aaa1596c913a3eeb865cb-0' : + 'biocontainers/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:3bdd798e4b9aed6d3e1aaa1596c913a3eeb865cb-0' }" + + input: + tuple val(meta), path(tumor), path(normal) + path fasta + path fasta_fai + path targets + path reference + val panel_of_normals + + output: + tuple val(meta), path("*.bed"), emit: bed + tuple val(meta), path("*.cnn"), emit: cnn, optional: true + tuple val(meta), path("*.cnr"), emit: cnr, optional: true + tuple val(meta), path("*.cns"), emit: cns, optional: true + tuple val(meta), path("*.pdf"), emit: pdf, optional: true + tuple val(meta), path("*.png"), emit: png, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def tumor_exists = tumor ? true : false + def normal_exists = normal ? true : false + + // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow + def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false + def normal_cram = normal_exists && normal.Extension == "cram" ? true : false + def tumor_bam = tumor_exists && tumor.Extension == "bam" ? true : false + def normal_bam = normal_exists && normal.Extension == "bam" ? true : false + + def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}" + + // tumor_only mode does not need fasta & target + // instead it requires a pre-computed reference.cnn which is built from fasta & target + def (normal_out, normal_args, fasta_args) = ["", "", ""] + def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : "" + + if (normal_exists){ + def normal_prefix = normal.BaseName + normal_out = normal_cram ? "${normal_prefix}" + ".bam" : "${normal}" + fasta_args = fasta ? "--fasta $fasta" : "" + + // germline mode + // normal samples must be input without a flag + // requires flag --normal to be empty [] + if(!tumor_exists){ + tumor_out = "${normal_prefix}" + ".bam" + normal_args = "--normal " + } + // somatic mode + else { + normal_args = normal_prefix ? "--normal $normal_out" : "" + } + } + + // generation of panel of normals + def generate_pon = panel_of_normals ? true : false + + if (generate_pon && !tumor_exists){ + def pon_input = normal.join(' ') + normal_args = "--normal $pon_input" + tumor_out = "" + } + + def target_args = targets ? "--targets $targets" : "" + def reference_args = reference ? "--reference $reference" : "" + + def samtools_cram_convert = '' + samtools_cram_convert += normal_cram ? " samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $normal_out\n" : '' + samtools_cram_convert += normal_cram ? " samtools index $normal_out\n" : '' + samtools_cram_convert += tumor_cram ? " samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out\n" : '' + samtools_cram_convert += tumor_cram ? " samtools index $tumor_out\n" : '' + def versions = normal_cram || tumor_cram ? + "samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')\n cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g')" : + "cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g')" + """ + $samtools_cram_convert + + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ${versions} + END_VERSIONS + """ +} diff --git a/modules/nf-core/cnvkit/batch/meta.yml b/modules/nf-core/cnvkit/batch/meta.yml new file mode 100644 index 0000000..3fc00f1 --- /dev/null +++ b/modules/nf-core/cnvkit/batch/meta.yml @@ -0,0 +1,86 @@ +name: cnvkit_batch +description: Copy number variant detection from high-throughput sequencing data +keywords: + - cnvkit + - bam + - fasta + - copy number +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tumour: + type: file + description: | + Input tumour sample bam file (or cram) + - normal: + type: file + description: | + Input normal sample bam file (or cram) + - fasta: + type: file + description: | + Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) + - fasta_fai: + type: file + description: | + Input reference genome fasta index (optional, but recommended for cram_input) + - targetfile: + type: file + description: | + Input target bed file + - reference: + type: file + description: | + Input reference cnn-file (only for germline and tumor-only running) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: File containing genomic regions + pattern: "*.{bed}" + - cnn: + type: file + description: File containing coverage information + pattern: "*.{cnn}" + - cnr: + type: file + description: File containing copy number ratio information + pattern: "*.{cnr}" + - cns: + type: file + description: File containing copy number segment information + pattern: "*.{cns}" + - pdf: + type: file + description: File with plot of copy numbers or segments on chromosomes + pattern: "*.{pdf}" + - png: + type: file + description: File with plot of bin-level log2 coverages and segmentation calls + pattern: "*.{png}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kaurravneet4123" + - "@KevinMenden" + - "@MaxUlysse" + - "@drpatelh" + - "@fbdtemme" + - "@lassefolkersen" + - "@SusiJo" From fa5b1799c2e90e32de2d5cf732692ac06ae26065 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 12:20:57 +0200 Subject: [PATCH 003/234] update CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 92e5780..f1f6bc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n ### `Added` +- `CNVKIT` can be used to create a PON +- Usage of nf-validation + ### `Fixed` ### `Dependencies` From 95806eb5ddbfcb229f2cbbe8f209ba90aea6b66a Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 12:21:14 +0200 Subject: [PATCH 004/234] update CITATIONS --- CITATIONS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CITATIONS.md b/CITATIONS.md index 9b55cf7..7d9a92a 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,9 +10,9 @@ ## Pipeline tools -- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +- [CNVKIT](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873) - > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. + > Talevich E, Shain AH, Botton T, Bastian BC (2016) CNVkit: Genome-Wide Copy Number Detection and Visualization from Targeted DNA Sequencing. PLoS Comput Biol 12(4): e1004873. doi: 10.1371/journal.pcbi.1004873. PubMed PMID: 27100738. PubMed Central PMCID: PMC4839673. - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) From 623601bf49c018fd226b27813d4e4e6f6d36e7c6 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 12:21:35 +0200 Subject: [PATCH 005/234] update README --- README.md | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index e053d15..70f8911 100644 --- a/README.md +++ b/README.md @@ -12,20 +12,11 @@ ## Introduction -**nf-core/createpanelrefs** is a bioinformatics pipeline that ... - - - - - +**nf-core/createpanelrefs** is a bioinformatics helper pipeline that will help in creating panel of normals and other models. 1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +2. Build Panel of Normals for [`CNVKIT`](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873) +3. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) ## Usage @@ -34,9 +25,6 @@ > to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) > with `-profile test` before running the workflow on actual data. - - Now, you can run the pipeline using: - - ```bash nextflow run nf-core/createpanelrefs \ -profile \ @@ -77,6 +61,9 @@ For more details about the output files and reports, please refer to the ## Credits nf-core/createpanelrefs was originally written by @maxulysse. +@marrip contributed in the idea that started it all. +@matthdsm and @FriederikeHanssen contributed in the actual design. +@ramprasadn's interest was the final push that led to the creation. We thank the following people for their extensive assistance in the development of this pipeline: @@ -90,11 +77,8 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations - - - An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. You can cite the `nf-core` publication as follows: From 827ec0cd6ed27883d8ed7a017b68a7fbb39dd37b Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 12:21:48 +0200 Subject: [PATCH 006/234] better igenomes.config --- conf/igenomes.config | 98 +++++++++++++++++++++++++++++--------------- 1 file changed, 65 insertions(+), 33 deletions(-) diff --git a/conf/igenomes.config b/conf/igenomes.config index 3f11437..6464acf 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -11,51 +11,86 @@ params { // illumina iGenomes reference file paths genomes { - 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + 'GATK.GRCh37' { + bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/BWAIndex/" + dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz" + dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz.tbi" + dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict" + fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta" + fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai" + intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/intervals/wgs_calling_regions_Sarek.list" + known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz" + known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz.tbi" + known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz" + known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz.tbi" + } + 'GATK.GRCh38' { + bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAmem2Index/" + dragmap = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/dragmap/" + cf_chrom_len = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Length/Homo_sapiens_assembly38.len" + dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" + dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" + dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" + fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta" + fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai" + intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/intervals/wgs_calling_regions_noseconds.hg38.bed" + known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz" + known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz.tbi" + known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" + known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" + pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz" + pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi" + } + 'Ensembl.GRCh37' { + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - mito_name = "MT" macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" + mito_name = "MT" + readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" } - 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" + 'NCBI.GRCh38' { + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" - mito_name = "chrM" macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + mito_name = "chrM" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" } 'CHM13' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2TCHM13v2.0/GCF_009914755.1_T2TCHM13v2.0_genomic.gff.gz" gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" - gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" mito_name = "chrM" } 'GRCm38' { - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" + dbsnp = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.snps_all.dbSNP142.vcf.gz" + dbsnp_tbi = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.snps_all.dbSNP142.vcf.gz.tbi" + dict = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.dict" + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa.fai" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + intervals = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/intervals/GRCm38_calling_list.bed" + known_indels = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz" + known_indels_tbi = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz.tbi" + macs_gsize = "1.87e9" + mito_name = "MT" + readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" } 'TAIR10' { fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" @@ -289,7 +324,6 @@ params { bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" mito_name = "chrM" macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" } 'hg19' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" @@ -302,7 +336,6 @@ params { readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" mito_name = "chrM" macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" } 'mm10' { fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" @@ -315,7 +348,6 @@ params { readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" mito_name = "chrM" macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" } 'bosTau8' { fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" From 980a041d0ade4e5451d03d046237e50311db5afa Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 12:22:04 +0200 Subject: [PATCH 007/234] remove fastqc --- modules/nf-core/fastqc/main.nf | 51 -------------------------------- modules/nf-core/fastqc/meta.yml | 52 --------------------------------- 2 files changed, 103 deletions(-) delete mode 100644 modules/nf-core/fastqc/main.nf delete mode 100644 modules/nf-core/fastqc/meta.yml diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf deleted file mode 100644 index 9ae5838..0000000 --- a/modules/nf-core/fastqc/main.nf +++ /dev/null @@ -1,51 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::fastqc=0.11.9" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // Make list of old name and new name pairs to use for renaming in the bash while loop - def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } - def rename_to = old_new_pairs*.join(' ').join(' ') - def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') - """ - printf "%s %s\\n" $rename_to | while read old_name new_name; do - [ -f "\${new_name}" ] || ln -s \$old_name \$new_name - done - fastqc $args --threads $task.cpus $renamed_files - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml deleted file mode 100644 index 4da5bb5..0000000 --- a/modules/nf-core/fastqc/meta.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: fastqc -description: Run FastQC on sequenced reads -keywords: - - quality control - - qc - - adapters - - fastq -tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ - licence: ["GPL-2.0-only"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" From 8f90e32baf177563590ad8925db3366839bf25ea Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 12:22:33 +0200 Subject: [PATCH 008/234] update workflow --- workflows/createpanelrefs.nf | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 4ab6d8b..49c553e 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -35,7 +35,6 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -46,7 +45,8 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' // // MODULE: Installed directly from nf-core/modules // -include { FASTQC } from '../modules/nf-core/fastqc/main' + +include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' @@ -63,32 +63,13 @@ workflow CREATEPANELREFS { ch_versions = Channel.empty() - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( - file(params.input) - ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") - // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ - // ! There is currently no tooling to help you write a sample sheet schema - - // - // MODULE: Run FastQC - // - FASTQC ( - INPUT_CHECK.out.reads - ) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) - // - // MODULE: MultiQC - // + // MULTIQC workflow_summary = WorkflowCreatepanelrefs.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) From 871d63da3e731709dab6834d611ae33674a4ff45 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 12:22:48 +0200 Subject: [PATCH 009/234] update samplesheet schema --- assets/schema_input.json | 60 ++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 95a560d..2c1a025 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -12,25 +12,67 @@ "pattern": "^\\S+$", "errorMessage": "Sample name must be provided and cannot contain spaces" }, - "fastq_1": { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "bam": { + "errorMessage": "BAM file cannot contain spaces and must have extension '.bam'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.bam$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "format": "file-path", + "exists": true + }, + "bai": { + "errorMessage": "BAM index file cannot contain spaces and must have extension '.bai'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.bai$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "format": "file-path", + "exists": true + }, + "cram": { + "errorMessage": "CRAM file cannot contain spaces and must have extension '.cram'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.cram$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "format": "file-path", + "exists": true }, - "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "crai": { + "errorMessage": "CRAM index file cannot contain spaces and must have extension '.crai'", "anyOf": [ { "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" + "pattern": "^\\S+\\.crai$" }, { "type": "string", "maxLength": 0 } - ] + ], + "format": "file-path", + "exists": true } }, - "required": ["sample", "fastq_1"] + "required": ["sample"] } } From 15f4570d8c4ed8bceddd2fc5cbdcbe53c3faccb3 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 12:28:13 +0200 Subject: [PATCH 010/234] sort params --- conf/igenomes.config | 414 +++++++++++++++++++++---------------------- 1 file changed, 207 insertions(+), 207 deletions(-) diff --git a/conf/igenomes.config b/conf/igenomes.config index 6464acf..1121cde 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -19,26 +19,26 @@ params { fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta" fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai" intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/intervals/wgs_calling_regions_Sarek.list" - known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz" - known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz.tbi" known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz" known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz.tbi" + known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz" + known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz.tbi" } 'GATK.GRCh38' { bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/" bwamem2 = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAmem2Index/" - dragmap = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/dragmap/" cf_chrom_len = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Length/Homo_sapiens_assembly38.len" dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" + dragmap = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/dragmap/" fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta" fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai" intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/intervals/wgs_calling_regions_noseconds.hg38.bed" - known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz" - known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz.tbi" known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" + known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz" + known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz.tbi" pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz" pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi" } @@ -93,380 +93,380 @@ params { star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" } 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" mito_name = "Mt" + readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" } 'EB2' { - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" } 'UMD3.1' { - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" mito_name = "MT" + readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" } 'WBcel235' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - mito_name = "MtDNA" macs_gsize = "9e7" + mito_name = "MtDNA" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" } 'CanFam3.1' { - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" mito_name = "MT" + readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" } 'GRCz10' { - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" mito_name = "MT" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" } 'BDGP6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - mito_name = "M" macs_gsize = "1.2e8" + mito_name = "M" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" } 'EquCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" mito_name = "MT" + readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" + star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" } 'EB1' { - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" } 'Galgal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" mito_name = "MT" + star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" } 'Gm01' { - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" } 'Mmul_1' { - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" mito_name = "MT" + readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" } 'IRGSP-1.0' { - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" mito_name = "Mt" + star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" } 'CHIMP2.1.4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" mito_name = "MT" + readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" } 'Rnor_5.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" mito_name = "MT" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" } 'Rnor_6.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" mito_name = "MT" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" } 'R64-1-1' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - mito_name = "MT" macs_gsize = "1.2e7" + mito_name = "MT" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" } 'EF2' { - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" - mito_name = "MT" macs_gsize = "1.21e7" + mito_name = "MT" + readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" } 'Sbi1' { - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" } 'Sscrofa10.2' { - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" mito_name = "MT" + readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" + star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" } 'AGPv3' { - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" mito_name = "Mt" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" } 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" - mito_name = "chrM" macs_gsize = "2.7e9" + mito_name = "chrM" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" } 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - mito_name = "chrM" macs_gsize = "2.7e9" + mito_name = "chrM" + readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" } 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" - mito_name = "chrM" macs_gsize = "1.87e9" + mito_name = "chrM" + readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" } 'bosTau8' { - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" mito_name = "chrM" + star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" } 'ce10' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" - mito_name = "chrM" macs_gsize = "9e7" + mito_name = "chrM" + readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" + star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" } 'canFam3' { - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" mito_name = "chrM" + readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" } 'danRer10' { - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" - mito_name = "chrM" macs_gsize = "1.37e9" + mito_name = "chrM" + star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" } 'dm6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" - mito_name = "chrM" macs_gsize = "1.2e8" + mito_name = "chrM" + star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" } 'equCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" mito_name = "chrM" + readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" + star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" } 'galGal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" mito_name = "chrM" + readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" + star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" } 'panTro4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" mito_name = "chrM" + readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" + star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" } 'rn6' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" mito_name = "chrM" + star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" } 'sacCer3' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" - readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" - mito_name = "chrM" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" macs_gsize = "1.2e7" + mito_name = "chrM" + readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" } 'susScr3' { - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" mito_name = "chrM" + readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" + star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" } } } From 80aa57055f9ce7b7ea044df476ba94823737223c Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 12:42:59 +0200 Subject: [PATCH 011/234] better keys --- conf/igenomes.config | 512 +++++++++++++++++++++---------------------- 1 file changed, 256 insertions(+), 256 deletions(-) diff --git a/conf/igenomes.config b/conf/igenomes.config index 1121cde..614ef61 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -11,99 +11,60 @@ params { // illumina iGenomes reference file paths genomes { - 'GATK.GRCh37' { - bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/BWAIndex/" - dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz" - dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz.tbi" - dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict" - fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta" - fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai" - intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/intervals/wgs_calling_regions_Sarek.list" - known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz" - known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz.tbi" - known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz" - known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz.tbi" + 'Ensembl.AGPv3' { + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + mito_name = "Mt" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" } - 'GATK.GRCh38' { - bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/" - bwamem2 = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAmem2Index/" - cf_chrom_len = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Length/Homo_sapiens_assembly38.len" - dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" - dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" - dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" - dragmap = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/dragmap/" - fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta" - fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai" - intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/intervals/wgs_calling_regions_noseconds.hg38.bed" - known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" - known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" - known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz" - known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz.tbi" - pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz" - pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi" + 'Ensembl.BDGP6' { + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + macs_gsize = "1.2e8" + mito_name = "M" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" } - 'Ensembl.GRCh37' { - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - macs_gsize = "2.7e9" + 'Ensembl.CanFam3.1' { + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" mito_name = "MT" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - } - 'NCBI.GRCh38' { - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - macs_gsize = "2.7e9" - mito_name = "chrM" - star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" - } - 'CHM13' { - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" - bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" - gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2TCHM13v2.0/GCF_009914755.1_T2TCHM13v2.0_genomic.gff.gz" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" - mito_name = "chrM" + readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" } - 'GRCm38' { - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" - dbsnp = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.snps_all.dbSNP142.vcf.gz" - dbsnp_tbi = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.snps_all.dbSNP142.vcf.gz.tbi" - dict = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.dict" - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - fasta_fai = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa.fai" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - intervals = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/intervals/GRCm38_calling_list.bed" - known_indels = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz" - known_indels_tbi = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz.tbi" - macs_gsize = "1.87e9" - mito_name = "MT" - readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + 'Ensembl.CHIMP2.1.4' { + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + mito_name = "MT" + readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" } - 'TAIR10' { - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - mito_name = "Mt" - readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + 'Ensembl.EB1' { + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" } - 'EB2' { + 'Ensembl.EB2' { bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" @@ -113,61 +74,19 @@ params { readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" } - 'UMD3.1' { - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - mito_name = "MT" - readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - } - 'WBcel235' { - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - macs_gsize = "9e7" - mito_name = "MtDNA" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - } - 'CanFam3.1' { - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - mito_name = "MT" - readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - } - 'GRCz10' { - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + 'Ensembl.EF2' { + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + macs_gsize = "1.21e7" mito_name = "MT" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - } - 'BDGP6' { - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - macs_gsize = "1.2e8" - mito_name = "M" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" } - 'EquCab2' { + 'Ensembl.EquCab2' { bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" @@ -178,17 +97,7 @@ params { readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" } - 'EB1' { - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - } - 'Galgal4' { + 'Ensembl.Galgal4' { bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" @@ -198,7 +107,7 @@ params { mito_name = "MT" star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" } - 'Gm01' { + 'Ensembl.Gm01' { bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" @@ -208,18 +117,48 @@ params { readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" } - 'Mmul_1' { - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" + 'Ensembl.GRCh37' { + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + macs_gsize = "2.7e9" mito_name = "MT" - readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + } + 'Ensembl.GRCm38' { + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" + dbsnp = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.snps_all.dbSNP142.vcf.gz" + dbsnp_tbi = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.snps_all.dbSNP142.vcf.gz.tbi" + dict = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.dict" + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + fasta_fai = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa.fai" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + intervals = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/intervals/GRCm38_calling_list.bed" + known_indels = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz" + known_indels_tbi = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz.tbi" + macs_gsize = "1.87e9" + mito_name = "MT" + readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + } + 'Ensembl.GRCz10' { + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + mito_name = "MT" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" } - 'IRGSP-1.0' { + 'Ensembl.IRGSP-1.0' { bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" @@ -229,18 +168,29 @@ params { mito_name = "Mt" star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" } - 'CHIMP2.1.4' { - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + 'Ensembl.Mmul_1' { + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" mito_name = "MT" - readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + } + 'Ensembl.R64-1-1' { + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + macs_gsize = "1.2e7" + mito_name = "MT" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" } - 'Rnor_5.0' { + 'Ensembl.Rnor_5.0' { bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" @@ -250,7 +200,7 @@ params { mito_name = "MT" star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" } - 'Rnor_6.0' { + 'Ensembl.Rnor_6.0' { bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" @@ -260,30 +210,7 @@ params { mito_name = "MT" star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" } - 'R64-1-1' { - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - macs_gsize = "1.2e7" - mito_name = "MT" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - } - 'EF2' { - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - macs_gsize = "1.21e7" - mito_name = "MT" - readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - } - 'Sbi1' { + 'Ensembl.Sbi1' { bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" @@ -293,7 +220,7 @@ params { readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" } - 'Sscrofa10.2' { + 'Ensembl.Sscrofa10.2' { bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" @@ -304,52 +231,82 @@ params { readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" } - 'AGPv3' { - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + 'Ensembl.TAIR10' { + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" mito_name = "Mt" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" } - 'hg38' { - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - macs_gsize = "2.7e9" - mito_name = "chrM" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + 'Ensembl.UMD3.1' { + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + mito_name = "MT" + readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" } - 'hg19' { - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - macs_gsize = "2.7e9" - mito_name = "chrM" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + 'Ensembl.WBcel235' { + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + macs_gsize = "9e7" + mito_name = "MtDNA" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" } - 'mm10' { - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - macs_gsize = "1.87e9" + 'GATK.GRCh37' { + bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/BWAIndex/" + dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz" + dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz.tbi" + dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict" + fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta" + fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai" + intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/intervals/wgs_calling_regions_Sarek.list" + known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz" + known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz.tbi" + known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz" + known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz.tbi" + } + 'GATK.GRCh38' { + bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAmem2Index/" + cf_chrom_len = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Length/Homo_sapiens_assembly38.len" + dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" + dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" + dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" + dragmap = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/dragmap/" + fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta" + fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai" + intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/intervals/wgs_calling_regions_noseconds.hg38.bed" + known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" + known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" + known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz" + known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz.tbi" + pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz" + pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi" + } + 'NCBI.GRCh38' { + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + macs_gsize = "2.7e9" mito_name = "chrM" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" - star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" } - 'bosTau8' { + 'UCSC.bosTau8' { bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" @@ -359,7 +316,18 @@ params { mito_name = "chrM" star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" } - 'ce10' { + 'UCSC.canFam3' { + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + mito_name = "chrM" + readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + } + 'UCSC.ce10' { bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" @@ -371,18 +339,15 @@ params { readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" } - 'canFam3' { - bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" - bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + 'UCSC.CHM13' { + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2TCHM13v2.0/GCF_009914755.1_T2TCHM13v2.0_genomic.gff.gz" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" mito_name = "chrM" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" - star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" } - 'danRer10' { + 'UCSC.danRer10' { bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" @@ -393,7 +358,7 @@ params { mito_name = "chrM" star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" } - 'dm6' { + 'UCSC.dm6' { bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" @@ -404,7 +369,7 @@ params { mito_name = "chrM" star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" } - 'equCab2' { + 'UCSC.equCab2' { bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" @@ -415,7 +380,7 @@ params { readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" } - 'galGal4' { + 'UCSC.galGal4' { bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" @@ -426,7 +391,42 @@ params { readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" } - 'panTro4' { + 'UCSC.hg19' { + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + macs_gsize = "2.7e9" + mito_name = "chrM" + readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + } + 'UCSC.hg38' { + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + macs_gsize = "2.7e9" + mito_name = "chrM" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + } + 'UCSC.mm10' { + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" + bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + macs_gsize = "1.87e9" + mito_name = "chrM" + readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + } + 'UCSC.panTro4' { bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" @@ -437,7 +437,7 @@ params { readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" } - 'rn6' { + 'UCSC.rn6' { bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" @@ -447,7 +447,7 @@ params { mito_name = "chrM" star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" } - 'sacCer3' { + 'UCSC.sacCer3' { bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" @@ -457,7 +457,7 @@ params { readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" } - 'susScr3' { + 'UCSC.susScr3' { bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" From 393c75082f5faba0880db1b2cb1678bd63eedc52 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 15:38:11 +0200 Subject: [PATCH 012/234] start working on tests --- conf/{modules.config => modules/base.config} | 12 ------------ conf/modules/cnvkit.config | 19 +++++++++++++++++++ conf/test.config | 10 +++++----- nextflow.config | 6 +++--- tests/csv/1.0.0/cram.csv | 3 +++ workflows/createpanelrefs.nf | 1 - 6 files changed, 30 insertions(+), 21 deletions(-) rename conf/{modules.config => modules/base.config} (79%) create mode 100644 conf/modules/cnvkit.config create mode 100644 tests/csv/1.0.0/cram.csv diff --git a/conf/modules.config b/conf/modules/base.config similarity index 79% rename from conf/modules.config rename to conf/modules/base.config index da58a5d..b4a9878 100644 --- a/conf/modules.config +++ b/conf/modules/base.config @@ -18,18 +18,6 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SAMPLESHEET_CHECK { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: FASTQC { - ext.args = '--quiet' - } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, diff --git a/conf/modules/cnvkit.config b/conf/modules/cnvkit.config new file mode 100644 index 0000000..9955a74 --- /dev/null +++ b/conf/modules/cnvkit.config @@ -0,0 +1,19 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + withName: CNVKIT_BATCH { + ext.args = '--method wgs --output-reference panel_of_normals.cnn' + } + +} diff --git a/conf/test.config b/conf/test.config index e307fb2..0aa4720 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,10 +20,10 @@ params { max_time = '6.h' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = "${projectDir}/tests/csv/1.0.0/cram.csv" - // Genome references - genome = 'R64-1-1' + // Small reference genome + genome = null + igenomes_ignore = true + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" } diff --git a/nextflow.config b/nextflow.config index 1c2fc1c..1673598 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,7 +16,7 @@ params { genome = null igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false - + // MultiQC options multiqc_config = null @@ -43,7 +43,6 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null - // Max resource options // Defaults only, expecting to be overwritten @@ -233,7 +232,8 @@ manifest { } // Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' +includeConfig 'conf/modules/base.config' +includeConfig 'conf/modules/cnvkit.config' // Function to ensure that resource requirements don't go beyond // a maximum limit diff --git a/tests/csv/1.0.0/cram.csv b/tests/csv/1.0.0/cram.csv new file mode 100644 index 0000000..a4531dc --- /dev/null +++ b/tests/csv/1.0.0/cram.csv @@ -0,0 +1,3 @@ +sample,cram +sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram +sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 49c553e..e86b5e7 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -80,7 +80,6 @@ workflow CREATEPANELREFS { ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) MULTIQC ( ch_multiqc_files.collect(), From 5688b341f9d477cb8bb516a4a7cc379b22ad0b64 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 16:08:58 +0200 Subject: [PATCH 013/234] add params.tools + use nf-validation --- conf/test.config | 3 +++ nextflow.config | 3 ++- nextflow_schema.json | 39 +++++++++++++++++++++++++++++------- workflows/createpanelrefs.nf | 12 ++++++++--- 4 files changed, 46 insertions(+), 11 deletions(-) diff --git a/conf/test.config b/conf/test.config index 0aa4720..cd90561 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,6 +22,9 @@ params { // Input data input = "${projectDir}/tests/csv/1.0.0/cram.csv" + // Main options + tools = 'cnvkit' + // Small reference genome genome = null igenomes_ignore = true diff --git a/nextflow.config b/nextflow.config index 1673598..8c77ab9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,7 +9,6 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options input = null // References @@ -17,6 +16,8 @@ params { igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false + // Building Panel of Normals and models + tools = null // No default, must be specified // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 1972341..5f110e5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -13,14 +13,23 @@ "required": ["input", "outdir"], "properties": { "input": { - "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/csv", - "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/createpanelrefs/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" + "help_text": "A design file with information about the samples in your experiment. Use this parameter to specify the location of the input files. It has to be a comma-separated file with a header row. See [usage docs](https://nf-co.re/sarek/usage#input).\n\nIf no input file is specified, sarek will attempt to locate one in the `{outdir}` directory.", + "fa_icon": "fas fa-file-csv", + "schema": "assets/schema_input.json", + "anyOf": [ + { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$" + }, + { + "type": "string", + "maxLength": 0 + } + ] }, "outdir": { "type": "string", @@ -42,6 +51,22 @@ } } }, + "main_options": { + "title": "Main options", + "type": "object", + "description": "Most common options used for the pipeline", + "default": "", + "properties": { + "tools": { + "type": "string", + "fa_icon": "fas fa-toolbox", + "description": "Tools to use for building Panel of Normals or models.", + "help_text": "Multiple tools separated with commas.\n\nTools available: CNVKIT.", + "pattern": "^((cnvkit)?,?)*(? Date: Sat, 8 Jul 2023 16:49:10 +0200 Subject: [PATCH 014/234] generate PON with CNVKIT --- conf/modules/cnvkit.config | 9 ++++++++- tests/csv/1.0.0/cram.csv | 6 +++--- workflows/createpanelrefs.nf | 30 ++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/conf/modules/cnvkit.config b/conf/modules/cnvkit.config index 9955a74..0e29261 100644 --- a/conf/modules/cnvkit.config +++ b/conf/modules/cnvkit.config @@ -13,7 +13,14 @@ process { withName: CNVKIT_BATCH { - ext.args = '--method wgs --output-reference panel_of_normals.cnn' + ext.args = {"--method wgs --output-reference ${meta.id}.cnn"} + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/cnvkit/" }, + pattern: "*{.cnn}" + ] + ] } } diff --git a/tests/csv/1.0.0/cram.csv b/tests/csv/1.0.0/cram.csv index a4531dc..e132a68 100644 --- a/tests/csv/1.0.0/cram.csv +++ b/tests/csv/1.0.0/cram.csv @@ -1,3 +1,3 @@ -sample,cram -sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram -sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram +sample,bam +sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam +sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index b5e20f3..11b7c6e 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -15,6 +15,20 @@ log.info logo + paramsSummaryLog(workflow) + citation WorkflowCreatepanelrefs.initialise(params, log) +// Check input path parameters to see if they exist + +def checkPathParamList = [ + params.fasta +] + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CHECK MANDATORY PARAMETERS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +for (param in checkPathParamList) if (param) file(param, checkIfExists: true) + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MANAGE SAMPLESHEET @@ -23,6 +37,17 @@ WorkflowCreatepanelrefs.initialise(params, log) ch_from_samplesheet = Channel.fromSamplesheet("input") +ch_input = ch_from_samplesheet.map{meta, bam, bai, cram, crai -> + if (bam) return [ [id:"panel", data_type:"bam" ], bam ] + if (cram) return [ [id:"panel", data_type:"cram" ], cram ] +}.groupTuple().branch{ + bam: it[0].data_type == "bam" + cram: it[0].data_type == "cram" +} + +// Initialize file channels based on params, defined in the params.genomes[params.genome] scope +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty() + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES @@ -71,6 +96,11 @@ workflow CREATEPANELREFS { ch_versions = Channel.empty() + if (params.tools && params.tools.split(',').contains('cnvkit')) { + CNVKIT_BATCH ( ch_input.bam.map{meta, bam -> [ meta, [], bam ]}, ch_fasta, [], [], [], true ) + ch_versions = ch_versions.mix(CNVKIT_BATCH.out.versions) + } + CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) From 512b18411720b3a615bf167051fa8eae37154b75 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 18:50:14 +0200 Subject: [PATCH 015/234] add tests --- .gitattributes | 1 + .github/workflows/ci.yml | 185 +++++++++++++++++++++++++--- .gitignore | 9 +- README.md | 21 +++- nf-test.config | 16 +++ tests/configs/tags.yml | 14 +++ tests/lib/UTILS.groovy | 11 ++ tests/pipeline/default.nf.test | 25 ++++ tests/pipeline/default.nf.test.snap | 18 +++ 9 files changed, 273 insertions(+), 27 deletions(-) create mode 100644 nf-test.config create mode 100644 tests/configs/tags.yml create mode 100644 tests/lib/UTILS.groovy create mode 100644 tests/pipeline/default.nf.test create mode 100644 tests/pipeline/default.nf.test.snap diff --git a/.gitattributes b/.gitattributes index 7a2dabc..31ba574 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,3 +2,4 @@ *.nf.test linguist-language=nextflow modules/nf-core/** linguist-generated subworkflows/nf-core/** linguist-generated +tests/**/*nf.test.snap linguist-generated diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4ca7478..cdefe64 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,43 +1,194 @@ -name: nf-core CI # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +name: nf-core CI on: push: branches: - dev pull_request: + branches: + - dev + - master release: - types: [published] + types: + - "published" env: NXF_ANSI_LOG: false + NFTEST_VER: "0.7.3" +# Cancel if a newer run is started concurrency: - group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: - test: - name: Run pipeline with test data - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/createpanelrefs') }}" + changes: + name: Check for changes + runs-on: ubuntu-latest + outputs: + # Expose matched filters as job 'tags' output variable + tags: ${{ steps.filter.outputs.changes }} + steps: + - uses: actions/checkout@v3 + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: "tests/config/tags.yml" + + define_nxf_versions: + name: Choose nextflow versions to test against depending on target branch + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.nxf_versions.outputs.matrix }} + steps: + - id: nxf_versions + run: | + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.base_ref }}" == "dev" && "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then + echo matrix='["latest-everything"]' | tee -a $GITHUB_OUTPUT + else + echo matrix='["latest-everything", "23.04.0"]' | tee -a $GITHUB_OUTPUT + fi + + test-dev: + if: (github.base_ref == 'dev' || (github.event_name == 'push')) + name: ${{ matrix.tags }} ${{ matrix.profile }} NXF ${{ matrix.NXF_VER }} runs-on: ubuntu-latest + needs: changes strategy: + fail-fast: false matrix: - NXF_VER: - - "23.04.0" - - "latest-everything" + # Run tests based on changes in code + tags: ["${{ fromJson(needs.changes.outputs.tags) }}"] + # Only run docker tests on dev branch + profile: ["docker"] + # Only test minimal version + NXF_VER: ["23.04.0"] + # Always run default test + include: + - tags: default + steps: - name: Check out pipeline code uses: actions/checkout@v3 - - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + - uses: actions/cache@v3 + with: + path: /usr/local/bin/nextflow + key: ${{ runner.os }} + restore-keys: | + ${{ runner.os }}-nextflow- + + - name: Install nf-test + run: | + wget -qO- https://code.askimed.com/install/nf-test | bash -s $NFTEST_VER + sudo mv nf-test /usr/local/bin/ + + - name: Install Nextflow ${{ matrix.NXF_VER }} + uses: nf-core/setup-nextflow@v1.2.0 with: version: "${{ matrix.NXF_VER }}" - - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix + - name: Run nf-test + uses: Wandalen/wretry.action@v1.0.11 + with: + command: nf-test test tests/${{ matrix.tags }}.nf.test --profile "test,${{ matrix.profile }}" --tap=test.tap + attempt_limit: 3 + + - name: Output log on failure + if: failure() run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + sudo apt install bat > /dev/null + batcat --decorations=always --color=always /home/runner/pytest_workflow_*/*/log.{out,err} + + - name: Upload logs on failure + if: failure() + uses: actions/upload-artifact@v2 + with: + name: logs-${{ matrix.profile }} + path: | + /home/runner/pytest_workflow_*/*/.nextflow.log + /home/runner/pytest_workflow_*/*/log.out + /home/runner/pytest_workflow_*/*/log.err + /home/runner/pytest_workflow_*/*/work + + test-master: + # Only run on master branch or when a PR is opened against master + if: github.base_ref == 'master' + name: ${{ matrix.tags }} ${{ matrix.profile }} NXF ${{ matrix.NXF_VER }} + runs-on: ubuntu-latest + needs: [changes, define_nxf_versions] + strategy: + fail-fast: false + matrix: + # Run tests based on changes in code + tags: ["${{ fromJson(needs.changes.outputs.tags) }}"] + # Only run docker tests on dev branch + profile: ["docker", "singularity", "conda"] + NXF_VER: ${{ fromJson(needs.define_nxf_versions.outputs.matrix) }} + # Always run default test + include: + - tags: default + + steps: + - name: Check out pipeline code + uses: actions/checkout@v3 + + - uses: actions/cache@v3 + with: + path: /usr/local/bin/nextflow + key: ${{ runner.os }} + restore-keys: | + ${{ runner.os }}-nextflow- + + - name: Install nf-test + run: | + wget -qO- https://code.askimed.com/install/nf-test | bash -s $NFTEST_VER + sudo mv nf-test /usr/local/bin/ + + - name: Install Nextflow ${{ matrix.NXF_VER }} + uses: nf-core/setup-nextflow@v1.2.0 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Set up Singularity + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-singularity@v5 + with: + singularity-version: 3.7.1 + + - name: Set up miniconda + if: matrix.profile == 'conda' + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + channels: conda-forge,bioconda,defaults + python-version: ${{ matrix.python-version }} + + - name: Conda clean + if: matrix.profile == 'conda' + run: conda clean -a + + - name: Run nf-test + uses: Wandalen/wretry.action@v1.0.11 + with: + command: nf-test test tests/${{ matrix.tag }}.nf.test --profile "test,${{ matrix.profile }}" --tap=test.tap + attempt_limit: 3 + + - name: Output log on failure + if: failure() + run: | + sudo apt install bat > /dev/null + batcat --decorations=always --color=always /home/runner/pytest_workflow_*/*/log.{out,err} + + - name: Upload logs on failure + if: failure() + uses: actions/upload-artifact@v2 + with: + name: logs-${{ matrix.profile }} + path: | + /home/runner/pytest_workflow_*/*/.nextflow.log + /home/runner/pytest_workflow_*/*/log.out + /home/runner/pytest_workflow_*/*/log.err + /home/runner/pytest_workflow_*/*/work + !/home/runner/pytest_workflow_*/*/work/conda + !/home/runner/pytest_workflow_*/*/work/singularity diff --git a/.gitignore b/.gitignore index 5124c9a..8cf857e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,9 @@ +*.pyc +.DS_Store .nextflow* -work/ +.nf-test/ data/ results/ -.DS_Store -testing/ testing* -*.pyc +testing/ +work/ diff --git a/README.md b/README.md index 70f8911..45e13c5 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # ![nf-core/createpanelrefs](docs/images/nf-core-createpanelrefs_logo_light.png#gh-light-mode-only) ![nf-core/createpanelrefs](docs/images/nf-core-createpanelrefs_logo_dark.png#gh-dark-mode-only) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results) +[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) @@ -8,7 +9,10 @@ [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/createpanelrefs) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs) +[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core) +[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core) +[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction @@ -30,11 +34,14 @@ First, prepare a samplesheet with your input data that looks as follows: `samplesheet.csv`: ```csv -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz +sample,bam +sample1,sample1.bam +sample2,sample2.bam +sample3,sample3.bam +sample4,sample4.bam ``` -Each row represents a fastq file (single-end) or a pair of fastq files (paired end). +Each row represents a bam file. Now, you can run the pipeline using: @@ -42,6 +49,7 @@ Now, you can run the pipeline using: nextflow run nf-core/createpanelrefs \ -profile \ --input samplesheet.csv \ + --genome GATK.GRCh38 \ --outdir ``` @@ -67,7 +75,8 @@ nf-core/createpanelrefs was originally written by @maxulysse. We thank the following people for their extensive assistance in the development of this pipeline: - +- @jfy133 +- @JoseEspinosa ## Contributions and Support diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..c35074c --- /dev/null +++ b/nf-test.config @@ -0,0 +1,16 @@ +config { + // location for all nf-tests + testsDir "tests/pipeline" + + // nf-test directory including temporary files for each test + workDir ".nf-test" + + // location of library folder that is added automatically to the classpath + libDir "tests/lib/" + + // location of an optional nextflow.config file specific for executing tests + configFile "nextflow.config" + + // run all test with defined profile(s) from the main nextflow.config + profile "" +} diff --git a/tests/configs/tags.yml b/tests/configs/tags.yml new file mode 100644 index 0000000..7e9962d --- /dev/null +++ b/tests/configs/tags.yml @@ -0,0 +1,14 @@ +default: + - conf/** + - main.nf + - modules/** + - nextflow.config + - nextflow_schema.json + - subworkflows/** + - tests/*.nf.test + - workflows/** + +cnvkit: + - conf/modules/cnvkit.config + - modules/nf-core/cnvkit/batch/main.nf + - tests/cnvkit.nf.test diff --git a/tests/lib/UTILS.groovy b/tests/lib/UTILS.groovy new file mode 100644 index 0000000..311403c --- /dev/null +++ b/tests/lib/UTILS.groovy @@ -0,0 +1,11 @@ +// Function to remove Nextflow version from software_versions.yml + +class UTILS { + public static String removeNextflowVersion(outputDir) { + def softwareVersions = path("$outputDir/pipeline_info/software_versions.yml").yaml + if (softwareVersions.containsKey("Workflow")) { + softwareVersions.Workflow.remove("Nextflow") + } + return softwareVersions + } +} diff --git a/tests/pipeline/default.nf.test b/tests/pipeline/default.nf.test new file mode 100644 index 0000000..4d24bf1 --- /dev/null +++ b/tests/pipeline/default.nf.test @@ -0,0 +1,25 @@ +nextflow_pipeline { + + name "Test pipeline" + script "main.nf" + tag "default" + + test("Run default test") { + + when { + params { + outdir = "$outputDir" + validationSchemaIgnoreParams = 'genomes,baseDir,base-dir,outputDir,output-dir' + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/multiqc/").exists() }, + { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, + { assert snapshot(path("$outputDir/reference/cnvkit/").list()).match("cnvkit") } + ) + } + } +} diff --git a/tests/pipeline/default.nf.test.snap b/tests/pipeline/default.nf.test.snap new file mode 100644 index 0000000..d41eebb --- /dev/null +++ b/tests/pipeline/default.nf.test.snap @@ -0,0 +1,18 @@ +{ + "software_versions": { + "content": [ + "{CNVKIT_BATCH={cnvkit=0.9.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, Workflow={nf-core/createpanelrefs=1.0dev}}" + ], + "timestamp": "2023-07-08T16:47:57+0000" + }, + "cnvkit": { + "content": [ + "panel.cnn:md5,07dea67088da689ad04012552c606882", + "test.paired_end.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test.paired_end.sorted.targetcoverage.cnn:md5,ff526714696aa49bdc1dc8d00d965266", + "test2.paired_end.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test2.paired_end.sorted.targetcoverage.cnn:md5,6ae6b3fce7299eedca6133d911c38fe1" + ], + "timestamp": "2023-07-08T16:47:57+0000" + } +} \ No newline at end of file From 538704eb31b7503b1b9936f5bacc86b34494b5c8 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 18:52:47 +0200 Subject: [PATCH 016/234] more tests --- tests/pipeline/cnvkit.nf.test | 26 ++++++++++++++++++++++++++ tests/pipeline/cnvkit.nf.test.snap | 18 ++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 tests/pipeline/cnvkit.nf.test create mode 100644 tests/pipeline/cnvkit.nf.test.snap diff --git a/tests/pipeline/cnvkit.nf.test b/tests/pipeline/cnvkit.nf.test new file mode 100644 index 0000000..dd255c3 --- /dev/null +++ b/tests/pipeline/cnvkit.nf.test @@ -0,0 +1,26 @@ +nextflow_pipeline { + + name "Test pipeline" + script "main.nf" + tag "cnvkit" + + test("Run cnvkit test") { + + when { + params { + outdir = "$outputDir" + tools = 'cnvkit' + validationSchemaIgnoreParams = 'genomes,baseDir,base-dir,outputDir,output-dir' + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/multiqc/").exists() }, + { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, + { assert snapshot(path("$outputDir/reference/cnvkit/").list()).match("cnvkit") } + ) + } + } +} diff --git a/tests/pipeline/cnvkit.nf.test.snap b/tests/pipeline/cnvkit.nf.test.snap new file mode 100644 index 0000000..f52ff4b --- /dev/null +++ b/tests/pipeline/cnvkit.nf.test.snap @@ -0,0 +1,18 @@ +{ + "software_versions": { + "content": [ + "{CNVKIT_BATCH={cnvkit=0.9.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, Workflow={nf-core/createpanelrefs=1.0dev}}" + ], + "timestamp": "2023-07-08T16:51:32+0000" + }, + "cnvkit": { + "content": [ + "panel.cnn:md5,07dea67088da689ad04012552c606882", + "test.paired_end.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test.paired_end.sorted.targetcoverage.cnn:md5,ff526714696aa49bdc1dc8d00d965266", + "test2.paired_end.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test2.paired_end.sorted.targetcoverage.cnn:md5,6ae6b3fce7299eedca6133d911c38fe1" + ], + "timestamp": "2023-07-08T16:51:32+0000" + } +} \ No newline at end of file From d48a1c2dac256bd563e30d18aedcec1174077beb Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 18:57:03 +0200 Subject: [PATCH 017/234] update CHANGELOG --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f1f6bc8..abde31e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,9 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n ### `Added` -- `CNVKIT` can be used to create a PON -- Usage of nf-validation +- [#5](https://github.com/nf-core/createpanelrefs/pull/5) - `CNVKIT` can be used to create a PON +- [#5](https://github.com/nf-core/createpanelrefs/pull/5) - Usage of nf-validation +- [#5](https://github.com/nf-core/createpanelrefs/pull/5) - Usage of nf-test ### `Fixed` From 77b10b83ba06d1e548be50e237b4e9e3b7d496d6 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 19:07:49 +0200 Subject: [PATCH 018/234] fix linting --- .nf-core.yml | 7 +++++++ .prettierignore | 17 +++++++++-------- modules.json | 9 ++------- .../nf-core/custom/dumpsoftwareversions/main.nf | 2 +- .../templates/dumpsoftwareversions.py | 3 +-- modules/nf-core/multiqc/main.nf | 2 +- nextflow_schema.json | 3 +++ 7 files changed, 24 insertions(+), 19 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc8..9ba16db 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,8 @@ repository_type: pipeline +lint: + actions_ci: False + files_exist: + - conf/modules.config + files_unchanged: + - .gitattributes + - .gitignore diff --git a/.prettierignore b/.prettierignore index 437d763..79934dc 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,12 +1,13 @@ -email_template.html -adaptivecard.json -slackreport.json +*.pyc +.DS_Store .nextflow* -work/ +.nf-test/ +adaptivecard.json +bin/ data/ +email_template.html results/ -.DS_Store -testing/ +slackreport.json testing* -*.pyc -bin/ +testing/ +work/ diff --git a/modules.json b/modules.json index 89ff874..29e931c 100644 --- a/modules.json +++ b/modules.json @@ -12,17 +12,12 @@ }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543", - "installed_by": ["modules"] - }, - "fastqc": { - "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "f2d63bd5b68925f98f572eed70993d205cc694b7", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] } } diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 800a609..ebc8727 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -5,7 +5,7 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index e55b8d4..da03340 100755 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -4,11 +4,10 @@ """Provide functions to merge multiple versions.yml files.""" +import yaml import platform from textwrap import dedent -import yaml - def _make_versions_html(versions): """Generate a tabular HTML output of all versions for MultiQC.""" diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 4b60474..1fc387b 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -4,7 +4,7 @@ process MULTIQC { conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/nextflow_schema.json b/nextflow_schema.json index 5f110e5..ae43650 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -312,6 +312,9 @@ { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/main_options" + }, { "$ref": "#/definitions/reference_genome_options" }, From ebeda5545614cee8a9f6ecb1c0a5b344935307b1 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 19:12:03 +0200 Subject: [PATCH 019/234] fix path --- tests/{configs => config}/tags.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{configs => config}/tags.yml (100%) diff --git a/tests/configs/tags.yml b/tests/config/tags.yml similarity index 100% rename from tests/configs/tags.yml rename to tests/config/tags.yml From abf40f5ea91aea07fded5a107ffad608fb1143c3 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Sat, 8 Jul 2023 19:15:06 +0200 Subject: [PATCH 020/234] fix path to tests --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cdefe64..b4fad51 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -91,7 +91,7 @@ jobs: - name: Run nf-test uses: Wandalen/wretry.action@v1.0.11 with: - command: nf-test test tests/${{ matrix.tags }}.nf.test --profile "test,${{ matrix.profile }}" --tap=test.tap + command: nf-test test tests/pipeline/${{ matrix.tags }}.nf.test --profile "test,${{ matrix.profile }}" --tap=test.tap attempt_limit: 3 - name: Output log on failure @@ -171,7 +171,7 @@ jobs: - name: Run nf-test uses: Wandalen/wretry.action@v1.0.11 with: - command: nf-test test tests/${{ matrix.tag }}.nf.test --profile "test,${{ matrix.profile }}" --tap=test.tap + command: nf-test test tests/pipeline/${{ matrix.tag }}.nf.test --profile "test,${{ matrix.profile }}" --tap=test.tap attempt_limit: 3 - name: Output log on failure From ce819e31203cf871b2f20213763cac94c360048e Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 26 Jul 2023 12:25:24 +0200 Subject: [PATCH 021/234] adding pon with mutect2 --- conf/igenomes.config | 1 - modules.json | 24 ++++ .../gatk4/createsomaticpanelofnormals/main.nf | 47 ++++++++ .../createsomaticpanelofnormals/meta.yml | 70 ++++++++++++ .../nf-core/gatk4/genomicsdbimport/main.nf | 103 +++++++++++++++++ .../nf-core/gatk4/genomicsdbimport/meta.yml | 84 ++++++++++++++ modules/nf-core/gatk4/mutect2/main.nf | 74 ++++++++++++ modules/nf-core/gatk4/mutect2/meta.yml | 105 ++++++++++++++++++ .../nf-core/bam_create_som_pon_gatk/main.nf | 69 ++++++++++++ .../nf-core/bam_create_som_pon_gatk/meta.yml | 67 +++++++++++ workflows/createpanelrefs.nf | 17 ++- 11 files changed, 657 insertions(+), 4 deletions(-) create mode 100644 modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf create mode 100644 modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml create mode 100644 modules/nf-core/gatk4/genomicsdbimport/main.nf create mode 100644 modules/nf-core/gatk4/genomicsdbimport/meta.yml create mode 100644 modules/nf-core/gatk4/mutect2/main.nf create mode 100644 modules/nf-core/gatk4/mutect2/meta.yml create mode 100644 subworkflows/nf-core/bam_create_som_pon_gatk/main.nf create mode 100644 subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml diff --git a/conf/igenomes.config b/conf/igenomes.config index 614ef61..73e84d9 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -280,7 +280,6 @@ params { 'GATK.GRCh38' { bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/" bwamem2 = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAmem2Index/" - cf_chrom_len = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Length/Homo_sapiens_assembly38.len" dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" diff --git a/modules.json b/modules.json index 29e931c..7f606c6 100644 --- a/modules.json +++ b/modules.json @@ -15,12 +15,36 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "gatk4/createsomaticpanelofnormals": { + "branch": "master", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "installed_by": ["bam_create_som_pon_gatk"] + }, + "gatk4/genomicsdbimport": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["bam_create_som_pon_gatk"] + }, + "gatk4/mutect2": { + "branch": "master", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "installed_by": ["bam_create_som_pon_gatk"] + }, "multiqc": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] } } + }, + "subworkflows": { + "nf-core": { + "bam_create_som_pon_gatk": { + "branch": "master", + "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf b/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf new file mode 100644 index 0000000..e5557c7 --- /dev/null +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf @@ -0,0 +1,47 @@ +process GATK4_CREATESOMATICPANELOFNORMALS { + tag "$meta.id" + label 'process_low' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(genomicsdb) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK CreateSomaticPanelOfNormals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" CreateSomaticPanelOfNormals \\ + --variant gendb://$genomicsdb \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml b/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml new file mode 100644 index 0000000..2f49cf2 --- /dev/null +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml @@ -0,0 +1,70 @@ +name: gatk4_createsomaticpanelofnormals +description: Create a panel of normals contraining germline and artifactual sites for use with mutect2. +keywords: + - gatk4 + - createsomaticpanelofnormals + - panelofnormals +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - genoomicsdb: + type: directory + description: genomicsDB workspace that contains the samples to create the somatic panel of normals with. + pattern: "*_genomicsDBworkspace" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test'] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test'] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test'] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + +output: + - vcf: + type: file + description: panel of normal as compressed vcf file + pattern: "*.vcf.gz" + - tbi: + type: file + description: Tabix index of vcf file + pattern: "*vcf.gz.tbi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/genomicsdbimport/main.nf b/modules/nf-core/gatk4/genomicsdbimport/main.nf new file mode 100644 index 0000000..dc77345 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/main.nf @@ -0,0 +1,103 @@ +process GATK4_GENOMICSDBIMPORT { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(vcf), path(tbi), path(interval_file), val(interval_value), path(wspace) + val run_intlist + val run_updatewspace + val input_map + + output: + tuple val(meta), path("$prefix") , optional:true, emit: genomicsdb + tuple val(meta), path("$updated_db") , optional:true, emit: updatedb + tuple val(meta), path("*.interval_list"), optional:true, emit: intervallist + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + // settings for running default create gendb mode + input_command = input_map ? "--sample-name-map ${vcf[0]}" : vcf.collect(){"--variant $it"}.join(' ') + + genomicsdb_command = "--genomicsdb-workspace-path ${prefix}" + interval_command = interval_file ? "--intervals ${interval_file}" : "--intervals ${interval_value}" + updated_db = "" + + // settings changed for running get intervals list mode if run_intlist is true + if (run_intlist) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = "--output-interval-list-to-file ${prefix}.interval_list" + } + + // settings changed for running update gendb mode. input_command same as default, update_db forces module to emit the updated gendb + if (run_updatewspace) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = '' + updated_db = "${wspace}" + } + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK GenomicsDBImport] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" GenomicsDBImport \\ + $input_command \\ + $genomicsdb_command \\ + $interval_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + + genomicsdb_command = "--genomicsdb-workspace-path ${prefix}" + interval_command = interval_file ? "--intervals ${interval_file}" : "--intervals ${interval_value}" + updated_db = "" + + // settings changed for running get intervals list mode if run_intlist is true + if (run_intlist) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = "--output-interval-list-to-file ${prefix}.interval_list" + } + + // settings changed for running update gendb mode. input_command same as default, update_db forces module to emit the updated gendb + if (run_updatewspace) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = '' + updated_db = "${wspace}" + } + + def stub_genomicsdb = genomicsdb_command == "--genomicsdb-workspace-path ${prefix}" ? "touch ${prefix}" : "" + def stub_interval = interval_command == "--output-interval-list-to-file ${prefix}.interval_list" ? "touch ${prefix}.interval_list" : "" + def stub_update = updated_db != "" ? "touch ${wspace}" : "" + + """ + ${stub_genomicsdb} + ${stub_interval} + ${stub_update} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/genomicsdbimport/meta.yml b/modules/nf-core/gatk4/genomicsdbimport/meta.yml new file mode 100644 index 0000000..af626cb --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/meta.yml @@ -0,0 +1,84 @@ +name: gatk4_genomicsdbimport +description: merge GVCFs from multiple samples. For use in joint genotyping or somatic panel of normal creation. +keywords: + - gatk4 + - genomicsdbimport + - genomicsdb + - panelofnormalscreation + - jointgenotyping +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: list + description: either a list of vcf files to be used to create or update a genomicsdb, or a file that contains a map to vcf files to be used. + pattern: "*.vcf.gz" + + - tbi: + type: list + description: list of tbi files that match with the input vcf files + pattern: "*.vcf.gz_tbi" + + - wspace: + type: path + description: path to an existing genomicsdb to be used in update db mode or get intervals mode. This WILL NOT specify name of a new genomicsdb in create db mode. + pattern: "/path/to/existing/gendb" + + - intervalfile: + type: file + description: file containing the intervals to be used when creating the genomicsdb + pattern: "*.interval_list" + + - intervalval: + type: string + description: if an intervals file has not been spcified, the value enetered here will be used as an interval via the "-L" argument + pattern: "example: chr1:1000-10000" + + - run_intlist: + type: boolean + description: Specify whether to run get interval list mode, this option cannot be specified at the same time as run_updatewspace. + pattern: "true/false" + + - run_updatewspace: + type: boolean + description: Specify whether to run update genomicsdb mode, this option takes priority over run_intlist. + pattern: "true/false" + + - input_map: + type: boolean + description: Specify whether the vcf input is providing a list of vcf file(s) or a single file containing a map of paths to vcf files to be used to create or update a genomicsdb. + pattern: "*.sample_map" + +output: + - genomicsdb: + type: directory + description: Directory containing the files that compose the genomicsdb workspace, this is only output for create mode, as update changes an existing db + pattern: "*/$prefix" + - updatedb: + type: directory + description: Directory containing the files that compose the updated genomicsdb workspace, this is only output for update mode, and should be the same path as the input wspace. + pattern: "same/path/as/wspace" + - intervallist: + type: file + description: File containing the intervals used to generate the genomicsdb, only created by get intervals mode. + pattern: "*.interval_list" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/mutect2/main.nf b/modules/nf-core/gatk4/mutect2/main.nf new file mode 100644 index 0000000..bddc368 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/main.nf @@ -0,0 +1,74 @@ +process GATK4_MUTECT2 { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(input), path(input_index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + path(germline_resource) + path(germline_resource_tbi) + path(panel_of_normals) + path(panel_of_normals_tbi) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + tuple val(meta), path("*.stats") , emit: stats + tuple val(meta), path("*.f1r2.tar.gz"), optional:true, emit: f1r2 + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def inputs = input.collect{ "--input $it"}.join(" ") + def interval_command = intervals ? "--intervals $intervals" : "" + def pon_command = panel_of_normals ? "--panel-of-normals $panel_of_normals" : "" + def gr_command = germline_resource ? "--germline-resource $germline_resource" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" Mutect2 \\ + $inputs \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + $pon_command \\ + $gr_command \\ + $interval_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + touch ${prefix}.vcf.gz.stats + touch ${prefix}.f1r2.tar.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/mutect2/meta.yml b/modules/nf-core/gatk4/mutect2/meta.yml new file mode 100644 index 0000000..4842c22 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/meta.yml @@ -0,0 +1,105 @@ +name: gatk4_mutect2 +description: Call somatic SNVs and indels via local assembly of haplotypes. +keywords: + - gatk4 + - mutect2 + - haplotype + - somatic +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - input: + type: list + description: list of BAM files, also able to take CRAM as an input + pattern: "*.{bam/cram}" + - input_index: + type: list + description: list of BAM file indexes, also able to take CRAM indexes as an input + pattern: "*.{bam.bai/cram.crai}" + - intervals: + type: file + description: Specify region the tools is run on. + pattern: ".{bed,interval_list}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - germline_resource: + type: file + description: Population vcf of germline sequencing, containing allele fractions. + pattern: "*.vcf.gz" + - germline_resource_tbi: + type: file + description: Index file for the germline resource. + pattern: "*.vcf.gz.tbi" + - panel_of_normals: + type: file + description: vcf file to be used as a panel of normals. + pattern: "*.vcf.gz" + - panel_of_normals_tbi: + type: file + description: Index for the panel of normals. + pattern: "*.vcf.gz.tbi" + +output: + - vcf: + type: file + description: compressed vcf file + pattern: "*.vcf.gz" + - tbi: + type: file + description: Index of vcf file + pattern: "*vcf.gz.tbi" + - stats: + type: file + description: Stats file that pairs with output vcf file + pattern: "*vcf.gz.stats" + - f1r2: + type: file + description: file containing information to be passed to LearnReadOrientationModel (only outputted when tumor_normal_pair mode is run) + pattern: "*.f1r2.tar.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@GCJMackenzie" + - "@ramprasadn" diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf b/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf new file mode 100644 index 0000000..a4b2c2c --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf @@ -0,0 +1,69 @@ +// +// Run GATK mutect2, genomicsdbimport and createsomaticpanelofnormals +// + +include { GATK4_MUTECT2 } from '../../../modules/nf-core/gatk4/mutect2/main' +include { GATK4_GENOMICSDBIMPORT } from '../../../modules/nf-core/gatk4/genomicsdbimport/main' +include { GATK4_CREATESOMATICPANELOFNORMALS } from '../../../modules/nf-core/gatk4/createsomaticpanelofnormals/main' + +workflow BAM_CREATE_SOM_PON_GATK { + take: + ch_mutect2_in // channel: [ val(meta), path(input), path(input_index), path(interval_file) ] + ch_fasta // channel: [ val(meta), path(fasta) ] + ch_fai // channel: [ val(meta), path(fai) ] + ch_dict // channel: [ val(meta), path(dict) ] + val_pon_norm // string: name for panel of normals + ch_gendb_intervals // channel: [ path(interval_file) ] + + main: + ch_versions = Channel.empty() + ch_input = ch_mutect2_in + + // + // Perform variant calling for each sample using mutect2 module in panel of normals mode. + // + GATK4_MUTECT2 ( + ch_input, + ch_fasta, + ch_fai, + ch_dict, + [], + [], + [], + [] + ) + ch_versions = ch_versions.mix(GATK4_MUTECT2.out.versions.first()) + + // + // Convert all sample vcfs into a genomicsdb workspace using genomicsdbimport. + // + ch_vcf = GATK4_MUTECT2.out.vcf.collect{it[1]}.toList() + ch_index = GATK4_MUTECT2.out.tbi.collect{it[1]}.toList() + ch_dict_gendb = ch_dict.map{meta, dict -> return dict}.toList() + + ch_gendb_input = Channel.of([id:val_pon_norm]) + .combine(ch_vcf) + .combine(ch_index) + .combine(ch_gendb_intervals) + .combine(ch_dict_gendb) + .map{meta, vcf, tbi, interval, dict -> [meta, vcf, tbi, interval, [], dict]} + + GATK4_GENOMICSDBIMPORT ( ch_gendb_input, false, false, false ) + ch_versions = ch_versions.mix(GATK4_GENOMICSDBIMPORT.out.versions.first()) + + // + //Panel of normals made from genomicsdb workspace using createsomaticpanelofnormals. + // + GATK4_CREATESOMATICPANELOFNORMALS ( GATK4_GENOMICSDBIMPORT.out.genomicsdb, ch_fasta, ch_fai, ch_dict ) + ch_versions = ch_versions.mix(GATK4_CREATESOMATICPANELOFNORMALS.out.versions.first()) + + emit: + mutect2_vcf = GATK4_MUTECT2.out.vcf // channel: [ val(meta), path(vcf) ] + mutect2_index = GATK4_MUTECT2.out.tbi // channel: [ val(meta), path(tbi) ] + mutect2_stats = GATK4_MUTECT2.out.stats // channel: [ val(meta), path(stats) ] + genomicsdb = GATK4_GENOMICSDBIMPORT.out.genomicsdb // channel: [ val(meta), path(genomicsdb) ] + pon_vcf = GATK4_CREATESOMATICPANELOFNORMALS.out.vcf // channel: [ val(meta), path(vcf) ] + pon_index = GATK4_CREATESOMATICPANELOFNORMALS.out.tbi // channel: [ val(meta), path(tbi) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml b/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml new file mode 100644 index 0000000..e682f7e --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_create_som_pon_gatk +description: Perform variant calling on a set of normal samples using mutect2 panel of normals mode. Group them into a genomicsdbworkspace using genomicsdbimport, then use this to create a panel of normals using createsomaticpanelofnormals. +keywords: + - gatk4 + - mutect2 + - genomicsdbimport + - createsomaticpanelofnormals + - variant_calling + - genomicsdb_workspace + - panel_of_normals +modules: + - gatk4/mutect2 + - gatk4/genomicsdbimport + - gatk4/createsomaticpanelofnormals +input: + - ch_mutect2_in: + type: list + description: | + An input channel containing the following files: + - input: One or more BAM/CRAM files + - input_index: The index/indices from the BAM/CRAM file(s) + - interval_file: An interval file to be used with the mutect call + Structure: [ meta, input, input_index, interval_file ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - mutect2_vcf: + type: list + description: List of compressed vcf files to be used to make the gendb workspace + pattern: "[ *.vcf.gz ]" + - mutect2_index: + type: list + description: List of indexes of mutect2_vcf files + pattern: "[ *vcf.gz.tbi ]" + - mutect2_stats: + type: list + description: List of stats files that pair with mutect2_vcf files + pattern: "[ *vcf.gz.stats ]" + - genomicsdb: + type: directory + description: Directory containing the files that compose the genomicsdb workspace. + pattern: "path/name_of_workspace" + - pon_vcf: + type: file + description: Panel of normal as compressed vcf file + pattern: "*.vcf.gz" + - pon_index: + type: file + description: Index of pon_vcf file + pattern: "*vcf.gz.tbi" +authors: + - "@GCJMackenzie" diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 11b7c6e..2e1dc58 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -18,7 +18,10 @@ WorkflowCreatepanelrefs.initialise(params, log) // Check input path parameters to see if they exist def checkPathParamList = [ - params.fasta + params.dict, + params.fasta, + params.fasta_fai, + params.input ] /* @@ -46,7 +49,9 @@ ch_input = ch_from_samplesheet.map{meta, bam, bai, cram, crai -> } // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty() +ch_dict = params.dict ? Channel.fromPath(params.dict).first() : Channel.empty() +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty() +ch_fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).first() : Channel.empty() /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -82,6 +87,7 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { BAM_CREATE_SOM_PON_GATK } from '../subworkflows/nf-core/bam_create_som_pon_gatk/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -97,10 +103,15 @@ workflow CREATEPANELREFS { ch_versions = Channel.empty() if (params.tools && params.tools.split(',').contains('cnvkit')) { - CNVKIT_BATCH ( ch_input.bam.map{meta, bam -> [ meta, [], bam ]}, ch_fasta, [], [], [], true ) + CNVKIT_BATCH ( ch_input.bam.map{ meta, bam -> [ meta, [], bam ]}, ch_fasta, [], [], [], true ) ch_versions = ch_versions.mix(CNVKIT_BATCH.out.versions) } + if (params.tools && params.tools.split(',').contains('mutect2')) { + BAM_CREATE_SOM_PON_GATK ( ch_input.cram.map{ meta, cram -> [ meta, bam ]}, ch_fasta, ch_fasta_fai, ch_dict, params.pon_name, [] ) + ch_versions = ch_versions.mix(BAM_CREATE_SOM_PON_GATK.out.versions) + } + CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) From cf522bfa59290a158bbe51b486b466ed3b70ea38 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 5 Sep 2023 17:21:34 +0200 Subject: [PATCH 022/234] add preprocessintervals and collectreadcounts --- assets/schema_input.json | 1 + conf/modules/germlinecnvcaller.config | 27 ++++++ modules.json | 25 ++++++ .../nf-core/gatk4/collectreadcounts/main.nf | 68 +++++++++++++++ .../nf-core/gatk4/collectreadcounts/meta.yml | 87 +++++++++++++++++++ .../nf-core/gatk4/preprocessintervals/main.nf | 61 +++++++++++++ .../gatk4/preprocessintervals/meta.yml | 84 ++++++++++++++++++ .../picard/createsequencedictionary/main.nf | 42 +++++++++ .../picard/createsequencedictionary/meta.yml | 45 ++++++++++ modules/nf-core/samtools/faidx/main.nf | 50 +++++++++++ modules/nf-core/samtools/faidx/meta.yml | 57 ++++++++++++ modules/nf-core/samtools/index/main.nf | 48 ++++++++++ modules/nf-core/samtools/index/meta.yml | 53 +++++++++++ nextflow.config | 1 + nextflow_schema.json | 4 +- .../local/germlinecnvcaller_cohort.nf | 33 +++++++ workflows/createpanelrefs.nf | 31 +++++-- 17 files changed, 708 insertions(+), 9 deletions(-) create mode 100644 conf/modules/germlinecnvcaller.config create mode 100644 modules/nf-core/gatk4/collectreadcounts/main.nf create mode 100644 modules/nf-core/gatk4/collectreadcounts/meta.yml create mode 100644 modules/nf-core/gatk4/preprocessintervals/main.nf create mode 100644 modules/nf-core/gatk4/preprocessintervals/meta.yml create mode 100644 modules/nf-core/picard/createsequencedictionary/main.nf create mode 100644 modules/nf-core/picard/createsequencedictionary/meta.yml create mode 100644 modules/nf-core/samtools/faidx/main.nf create mode 100644 modules/nf-core/samtools/faidx/meta.yml create mode 100644 modules/nf-core/samtools/index/main.nf create mode 100644 modules/nf-core/samtools/index/meta.yml create mode 100644 subworkflows/local/germlinecnvcaller_cohort.nf diff --git a/assets/schema_input.json b/assets/schema_input.json index 2c1a025..97ec0d7 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -9,6 +9,7 @@ "properties": { "sample": { "type": "string", + "meta": ["id"], "pattern": "^\\S+$", "errorMessage": "Sample name must be provided and cannot contain spaces" }, diff --git a/conf/modules/germlinecnvcaller.config b/conf/modules/germlinecnvcaller.config new file mode 100644 index 0000000..5bf4498 --- /dev/null +++ b/conf/modules/germlinecnvcaller.config @@ -0,0 +1,27 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + withName: GATK4_PREPROCESSINTERVALS { + ext.args = {"--imr OVERLAPPING_ONLY"} + } + + withName: GATK4_COLLECTREADCOUNTS { + ext.args = {"--format TSV --imr OVERLAPPING_ONLY"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/germlinecnvcaller/readcounts" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/modules.json b/modules.json index 29e931c..309b4a6 100644 --- a/modules.json +++ b/modules.json @@ -15,10 +15,35 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "gatk4/collectreadcounts": { + "branch": "master", + "git_sha": "d25bf48327e86a7f737047a57ec264b90e22ce3d", + "installed_by": ["modules"] + }, + "gatk4/preprocessintervals": { + "branch": "master", + "git_sha": "1226419498a14d17f98d12d6488d333b0dbd0418", + "installed_by": ["modules"] + }, "multiqc": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] + }, + "picard/createsequencedictionary": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", + "installed_by": ["modules"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] } } } diff --git a/modules/nf-core/gatk4/collectreadcounts/main.nf b/modules/nf-core/gatk4/collectreadcounts/main.nf new file mode 100644 index 0000000..ce1985b --- /dev/null +++ b/modules/nf-core/gatk4/collectreadcounts/main.nf @@ -0,0 +1,68 @@ +process GATK4_COLLECTREADCOUNTS { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(input), path(input_index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + + output: + tuple val(meta), path("*.hdf5"), optional: true, emit: hdf5 + tuple val(meta), path("*.tsv") , optional: true, emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def reference = fasta ? "--reference $fasta" : "" + def extension = args.contains("--format HDF5") ? "hdf5" : + args.contains("--format TSV") ? "tsv" : + "hdf5" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK COLLECTREADCOUNTS] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" CollectReadCounts \\ + --input $input \\ + --intervals $intervals \\ + --output ${prefix}.$extension \\ + $reference \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--format HDF5") ? "hdf5" : + args.contains("--format TSV") ? "tsv" : + "hdf5" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/collectreadcounts/meta.yml b/modules/nf-core/gatk4/collectreadcounts/meta.yml new file mode 100644 index 0000000..938011c --- /dev/null +++ b/modules/nf-core/gatk4/collectreadcounts/meta.yml @@ -0,0 +1,87 @@ +name: "gatk4_collectreadcounts" +description: Collects read counts at specified intervals. The count for each interval is calculated by counting the number of read starts that lie in the interval. +keywords: + - bam + - cram + - CollectReadCounts + - gatk + - gatk4 +tools: + - gatk4: + description: + Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037593911-CombineGVCFs + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - input_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - intervals: + type: file + description: A file containing the specified intervals + pattern: "*.{bed,intervals}" + - fasta: + type: file + description: Optional - Reference FASTA + pattern: "*.{fasta,fa}" + - fai: + type: file + description: Optional - Index of the reference FASTA file + pattern: "*.fai" + - dict: + type: file + description: Optional - Sequence dictionary of the reference FASTA file + pattern: "*.dict" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - hdf5: + type: file + description: The read counts in hdf5 format + pattern: "*.hdf5" + - tsv: + type: file + description: The read counts in TSV format + pattern: "*.tsv" + +authors: + - "@nvnieuwk" diff --git a/modules/nf-core/gatk4/preprocessintervals/main.nf b/modules/nf-core/gatk4/preprocessintervals/main.nf new file mode 100644 index 0000000..aff482f --- /dev/null +++ b/modules/nf-core/gatk4/preprocessintervals/main.nf @@ -0,0 +1,61 @@ +process GATK4_PREPROCESSINTERVALS { + tag "$fasta" + label 'process_medium' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(fai) + tuple val(meta3), path(dict) + tuple val(meta4), path(intervals) + tuple val(meta5), path(exclude_intervals) + + output: + tuple val(meta), path("*.interval_list"), emit: interval_list + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def include_command = intervals ? "--intervals $intervals" : "" + def exclude_command = exclude_intervals ? "--exclude-intervals $exclude_intervals" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK PreprocessIntervals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + """ + gatk --java-options "-Xmx${avail_mem}M" PreprocessIntervals \\ + $include_command \\ + $exclude_command \\ + --reference $fasta \\ + --output ${prefix}.interval_list \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.interval_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/preprocessintervals/meta.yml b/modules/nf-core/gatk4/preprocessintervals/meta.yml new file mode 100644 index 0000000..8b6ae9b --- /dev/null +++ b/modules/nf-core/gatk4/preprocessintervals/meta.yml @@ -0,0 +1,84 @@ +name: "gatk4_preprocessintervals" +description: Prepares bins for coverage collection. +keywords: + - gatk4 + - preprocessintervals + - interval + - bed +tools: + - "gatk4": + description: + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - intervals: + type: file + description: Interval file (bed or interval_list) with the genomic regions to be included from the analysis (optional) + pattern: "*.{bed,interval_list}" + - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - exclude_intervals: + type: file + description: Interval file (bed or interval_list) with the genomic regions to be excluded from the analysis (optional) + pattern: "*.{bed,interval_list}" + +output: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - interval_list: + type: file + description: Processed interval list file + pattern: "*.{bed,interval_list}" + +authors: + - "@ryanjameskennedy" + - "@ViktorHy" + - "@ramprasadn" diff --git a/modules/nf-core/picard/createsequencedictionary/main.nf b/modules/nf-core/picard/createsequencedictionary/main.nf new file mode 100644 index 0000000..d07cc67 --- /dev/null +++ b/modules/nf-core/picard/createsequencedictionary/main.nf @@ -0,0 +1,42 @@ +process PICARD_CREATESEQUENCEDICTIONARY { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::picard=3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'biocontainers/picard:3.0.0--hdfd78af_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.dict"), emit: reference_dict + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard CreateSequenceDictionary] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + picard \\ + -Xmx${avail_mem}M \\ + CreateSequenceDictionary \\ + $args \\ + --REFERENCE $fasta \\ + --OUTPUT ${prefix}.dict + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CreateSequenceDictionary --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/picard/createsequencedictionary/meta.yml b/modules/nf-core/picard/createsequencedictionary/meta.yml new file mode 100644 index 0000000..3e04159 --- /dev/null +++ b/modules/nf-core/picard/createsequencedictionary/meta.yml @@ -0,0 +1,45 @@ +name: picard_createsequencedictionary +description: Creates a sequence dictionary for a reference sequence. +keywords: + - sequence + - dictionary + - picard +tools: + - picard: + description: | + Creates a sequence dictionary file (with ".dict" extension) from a reference sequence provided in FASTA format, which is required by many processing and analysis tools. The output file contains a header but no SAMRecords, and the header contains only sequence records. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360036712531-CreateSequenceDictionary-Picard- + tool_dev_url: https://github.com/broadinstitute/picard + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - dict: + type: file + description: picard dictionary file + pattern: "*.{dict}" + +authors: + - "@sateeshperi" + - "@mjcipriano" + - "@hseabolt" diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf new file mode 100644 index 0000000..59ed308 --- /dev/null +++ b/modules/nf-core/samtools/faidx/main.nf @@ -0,0 +1,50 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(fai) + + output: + tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + faidx \\ + $fasta \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' + """ + ${fastacmd} + touch ${fasta}.fai + + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 0000000..957b25e --- /dev/null +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,57 @@ +name: samtools_faidx +description: Index FASTA file +keywords: + - index + - fasta + - faidx +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 0000000..0b20aa4 --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 0000000..8bd2fa6 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,53 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/nextflow.config b/nextflow.config index 8c77ab9..0eba204 100644 --- a/nextflow.config +++ b/nextflow.config @@ -235,6 +235,7 @@ manifest { // Load modules.config for DSL2 module specific options includeConfig 'conf/modules/base.config' includeConfig 'conf/modules/cnvkit.config' +includeConfig 'conf/modules/germlinecnvcaller.config' // Function to ensure that resource requirements don't go beyond // a maximum limit diff --git a/nextflow_schema.json b/nextflow_schema.json index ae43650..99050ca 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -61,8 +61,8 @@ "type": "string", "fa_icon": "fas fa-toolbox", "description": "Tools to use for building Panel of Normals or models.", - "help_text": "Multiple tools separated with commas.\n\nTools available: CNVKIT.", - "pattern": "^((cnvkit)?,?)*(? it[1]}) + .set {ch_readcounts_in} + + GATK4_COLLECTREADCOUNTS(ch_readcounts_in, ch_fasta, ch_fai, ch_dict) + + ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) + + emit: + versions = ch_versions +} diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 11b7c6e..72ce6d8 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -38,15 +38,12 @@ for (param in checkPathParamList) if (param) file(param, checkIfExists: true) ch_from_samplesheet = Channel.fromSamplesheet("input") ch_input = ch_from_samplesheet.map{meta, bam, bai, cram, crai -> - if (bam) return [ [id:"panel", data_type:"bam" ], bam ] - if (cram) return [ [id:"panel", data_type:"cram" ], cram ] -}.groupTuple().branch{ - bam: it[0].data_type == "bam" - cram: it[0].data_type == "cram" + if (bam) return [ [data_type:"bam"] + meta, bam ] + if (cram) return [ [data_type:"cram"] + meta, cram ] } // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty() +ch_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it.baseName], it] }.collect() /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -83,6 +80,11 @@ include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch/ma include { MULTIQC } from '../modules/nf-core/multiqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +// +// SUBWORKFLOW imports +// + +include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -97,10 +99,25 @@ workflow CREATEPANELREFS { ch_versions = Channel.empty() if (params.tools && params.tools.split(',').contains('cnvkit')) { - CNVKIT_BATCH ( ch_input.bam.map{meta, bam -> [ meta, [], bam ]}, ch_fasta, [], [], [], true ) + ch_input + .map{ meta, bam -> + new_meta = meta + [id:"panel"] + [new_meta, bam] + }.groupTuple().branch{ + bam: it[0].data_type == "bam" + cram: it[0].data_type == "cram" + }.bam.set { ch_cnvkit_input } + + CNVKIT_BATCH ( ch_cnvkit_input.map{meta, bam -> [ meta, [], bam ]}, ch_fasta.map{meta, fasta -> fasta}, [], [], [], true ) ch_versions = ch_versions.mix(CNVKIT_BATCH.out.versions) } + if (params.tools && params.tools.split(',').contains('germlinecnvcaller')) { + + GERMLINECNVCALLER_COHORT(ch_input, ch_fasta) + ch_versions = ch_versions.mix(GERMLINECNVCALLER_COHORT.out.versions) + } + CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) From 2f7b7d0ec2433bb6c40491ece129f935fd2b40dc Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 5 Sep 2023 19:51:37 +0200 Subject: [PATCH 023/234] add annotateintervals module --- modules.json | 5 ++ .../nf-core/gatk4/annotateintervals/main.nf | 68 ++++++++++++++++ .../nf-core/gatk4/annotateintervals/meta.yml | 77 +++++++++++++++++++ 3 files changed, 150 insertions(+) create mode 100644 modules/nf-core/gatk4/annotateintervals/main.nf create mode 100644 modules/nf-core/gatk4/annotateintervals/meta.yml diff --git a/modules.json b/modules.json index 309b4a6..8446825 100644 --- a/modules.json +++ b/modules.json @@ -15,6 +15,11 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "gatk4/annotateintervals": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, "gatk4/collectreadcounts": { "branch": "master", "git_sha": "d25bf48327e86a7f737047a57ec264b90e22ce3d", diff --git a/modules/nf-core/gatk4/annotateintervals/main.nf b/modules/nf-core/gatk4/annotateintervals/main.nf new file mode 100644 index 0000000..394a6aa --- /dev/null +++ b/modules/nf-core/gatk4/annotateintervals/main.nf @@ -0,0 +1,68 @@ +process GATK4_ANNOTATEINTERVALS { + tag "$meta.id" + label 'process_single' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(intervals) + path(fasta) + path(fasta_fai) + path(dict) + path(mappable_regions) + path(mappable_regions_tbi) + path(segmental_duplication_regions) + path(segmental_duplication_regions_tbi) + + output: + tuple val(meta), path("*.tsv"), emit: annotated_intervals + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def inputs = intervals.collect(){ "--intervals ${it}" }.join(" ") + def mappability_track = mappable_regions ? "--mappability-track ${mappable_regions}" : "" + def segmental_duplication_tracks = segmental_duplication_regions ? "--segmental-duplication-track ${segmental_duplication_regions}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK AnnotateIntervals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + """ + gatk --java-options "-Xmx${avail_mem}M" AnnotateIntervals \\ + ${inputs} \\ + --reference ${fasta} \\ + --output ${prefix}.tsv \\ + ${mappability_track} \\ + ${segmental_duplication_tracks} \\ + --tmp-dir . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/annotateintervals/meta.yml b/modules/nf-core/gatk4/annotateintervals/meta.yml new file mode 100644 index 0000000..6f16c95 --- /dev/null +++ b/modules/nf-core/gatk4/annotateintervals/meta.yml @@ -0,0 +1,77 @@ +name: "gatk4_annotateintervals" +description: Annotates intervals with GC content, mappability, and segmental-duplication content +keywords: + - gatk + - annotateintervals + - intervals + - bed + - annotation +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals: + type: file(s) + description: One or more interval files to annotate + pattern: "*.{interval_list,list,bed}" + - fasta: + type: file + description: The reference FASTA file + pattern: "*.{fasta,fa}" + - fasta_fai: + type: file + description: The index of the reference FASTA file + pattern: "*.fai" + - dict: + type: file + description: The sequence dictionary reference FASTA file + pattern: "*.dict" + - mappable_regions: + type: file + description: | + Optional - Umap single-read mappability track + The track should correspond to the appropriate read length and overlapping intervals must be merged + pattern: "*.bed(.gz)?" + - mappable_regions_tbi: + type: file + description: Optional - The index of the gzipped umap single-read mappability track + pattern: "*.bed.gz.tbi" + - segmental_duplication_regions: + type: file + description: Optional - Segmental-duplication track + pattern: "*.bed(.gz)?" + - segmental_duplication_regions_tbi: + type: file + description: Optional - The index of the gzipped segmental-duplication track + pattern: "*.bed.gz.tbi" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - annotated_intervals: + type: file + description: The output TSV file with a SAM-style header containing the annotated intervals + pattern: "*.tsv" + +authors: + - "@nvnieuwk" From c53040bb343046eecec35817fd66a1c36378c595 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 5 Sep 2023 19:55:27 +0200 Subject: [PATCH 024/234] add filterintervals module --- modules.json | 5 ++ modules/nf-core/gatk4/filterintervals/main.nf | 58 +++++++++++++++++++ .../nf-core/gatk4/filterintervals/meta.yml | 53 +++++++++++++++++ 3 files changed, 116 insertions(+) create mode 100644 modules/nf-core/gatk4/filterintervals/main.nf create mode 100644 modules/nf-core/gatk4/filterintervals/meta.yml diff --git a/modules.json b/modules.json index 8446825..c1fdb56 100644 --- a/modules.json +++ b/modules.json @@ -25,6 +25,11 @@ "git_sha": "d25bf48327e86a7f737047a57ec264b90e22ce3d", "installed_by": ["modules"] }, + "gatk4/filterintervals": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, "gatk4/preprocessintervals": { "branch": "master", "git_sha": "1226419498a14d17f98d12d6488d333b0dbd0418", diff --git a/modules/nf-core/gatk4/filterintervals/main.nf b/modules/nf-core/gatk4/filterintervals/main.nf new file mode 100644 index 0000000..2ce7702 --- /dev/null +++ b/modules/nf-core/gatk4/filterintervals/main.nf @@ -0,0 +1,58 @@ +process GATK4_FILTERINTERVALS { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(intervals) + path read_counts + path annotated_intervals + + output: + tuple val(meta), path("*.interval_list"), emit: interval_list + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def annotated_command = annotated_intervals ? "--annotated-intervals $annotated_intervals" : "" + def read_counts_command = read_counts ? "--input $read_counts" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK FilterIntervals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" FilterIntervals \\ + $annotated_command \\ + $read_counts_command \\ + --intervals $intervals \\ + --output ${prefix}.interval_list \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.interval_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/filterintervals/meta.yml b/modules/nf-core/gatk4/filterintervals/meta.yml new file mode 100644 index 0000000..efd66f4 --- /dev/null +++ b/modules/nf-core/gatk4/filterintervals/meta.yml @@ -0,0 +1,53 @@ +name: "gatk4_filterintervals" +description: Filters intervals based on annotations and/or count statistics. +keywords: + - gatk4 + - gatk4_filterintervals +tools: + - "gatk4": + description: + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - read_counts: + type: file + description: Read counts input file + pattern: "*.{tsv, hdf5}" + - intervals: + type: file + description: Processed interval list file (processed_intervals.interval_list) + pattern: "*.interval_list" + - annotated_intervals: + type: file + description: Annotated intervals TSV file (annotated_intervals.tsv). + pattern: "*.tsv" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - interval_list: + type: file + description: Filtered interval list file + pattern: "*.interval_list" + +authors: + - "@ryanjameskennedy" + - "@ViktorHy" From 44cffd6feae13394af093ec478c7d81ebac977d2 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 6 Sep 2023 10:21:22 +0200 Subject: [PATCH 025/234] update cnvkit --- modules.json | 2 +- modules/nf-core/cnvkit/batch/main.nf | 14 ++++++------- modules/nf-core/cnvkit/batch/meta.yml | 30 ++++++++++++++++++++++++--- 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/modules.json b/modules.json index 29e931c..4b01925 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "cnvkit/batch": { "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "git_sha": "016397249f05f5af7b97e3ea8d64458a07df2928", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { diff --git a/modules/nf-core/cnvkit/batch/main.nf b/modules/nf-core/cnvkit/batch/main.nf index 1e4d81e..795053a 100644 --- a/modules/nf-core/cnvkit/batch/main.nf +++ b/modules/nf-core/cnvkit/batch/main.nf @@ -2,17 +2,17 @@ process CNVKIT_BATCH { tag "$meta.id" label 'process_low' - conda "bioconda::cnvkit=0.9.9 bioconda::samtools=1.16.1" + conda "bioconda::cnvkit=0.9.10 bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:3bdd798e4b9aed6d3e1aaa1596c913a3eeb865cb-0' : - 'biocontainers/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:3bdd798e4b9aed6d3e1aaa1596c913a3eeb865cb-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:c94363856059151a2974dc501fb07a0360cc60a3-0' : + 'biocontainers/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:c94363856059151a2974dc501fb07a0360cc60a3-0' }" input: tuple val(meta), path(tumor), path(normal) - path fasta - path fasta_fai - path targets - path reference + tuple val(meta2), path(fasta) + tuple val(meta3), path(fasta_fai) + tuple val(meta4), path(targets) + tuple val(meta5), path(reference) val panel_of_normals output: diff --git a/modules/nf-core/cnvkit/batch/meta.yml b/modules/nf-core/cnvkit/batch/meta.yml index 3fc00f1..1543304 100644 --- a/modules/nf-core/cnvkit/batch/meta.yml +++ b/modules/nf-core/cnvkit/batch/meta.yml @@ -26,22 +26,43 @@ input: type: file description: | Input normal sample bam file (or cram) + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta: type: file description: | Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta_fai: type: file description: | Input reference genome fasta index (optional, but recommended for cram_input) + - meta4: + type: map + description: | + Groovy Map containing information about target file + e.g. [ id:'test' ] - targetfile: type: file description: | Input target bed file + - meta5: + type: map + description: | + Groovy Map containing information about reference file + e.g. [ id:'test' ] - reference: type: file description: | Input reference cnn-file (only for germline and tumor-only running) + output: - meta: type: map @@ -76,11 +97,14 @@ output: type: file description: File containing software versions pattern: "versions.yml" + authors: - - "@kaurravneet4123" - - "@KevinMenden" - - "@MaxUlysse" + - "@adamrtalbot" - "@drpatelh" - "@fbdtemme" + - "@kaurravneet4123" + - "@KevinMenden" - "@lassefolkersen" + - "@MaxUlysse" + - "@priesgo" - "@SusiJo" From 7afbbec7f2a75d1d7deb208339cf439221e30a77 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 6 Sep 2023 10:41:36 +0200 Subject: [PATCH 026/234] refactor cnvkit logic --- assets/schema_input.json | 1 + workflows/createpanelrefs.nf | 26 ++++++++++++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 2c1a025..c5fd52a 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,6 +10,7 @@ "sample": { "type": "string", "pattern": "^\\S+$", + "meta": ["id"], "errorMessage": "Sample name must be provided and cannot contain spaces" }, "bam": { diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 11b7c6e..455f11a 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -38,15 +38,11 @@ for (param in checkPathParamList) if (param) file(param, checkIfExists: true) ch_from_samplesheet = Channel.fromSamplesheet("input") ch_input = ch_from_samplesheet.map{meta, bam, bai, cram, crai -> - if (bam) return [ [id:"panel", data_type:"bam" ], bam ] - if (cram) return [ [id:"panel", data_type:"cram" ], cram ] -}.groupTuple().branch{ - bam: it[0].data_type == "bam" - cram: it[0].data_type == "cram" + if (bam) return [ meta + [data_type:"bam"], bam ] + if (cram) return [ meta + [data_type:"cram" ], cram ] } - // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty() +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() : Channel.empty() /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -97,7 +93,21 @@ workflow CREATEPANELREFS { ch_versions = Channel.empty() if (params.tools && params.tools.split(',').contains('cnvkit')) { - CNVKIT_BATCH ( ch_input.bam.map{meta, bam -> [ meta, [], bam ]}, ch_fasta, [], [], [], true ) + + ch_input + .map{ meta, bam -> + new_meta = meta + [id:"panel"] + [new_meta, bam] + } + .groupTuple() + .branch{ + bam: it[0].data_type == "bam" + } + .bam + .map {meta, bam -> [ meta, [], bam ]} + .set { ch_cnvkit_input } + + CNVKIT_BATCH ( ch_cnvkit_input, ch_fasta, [[:],[]], [[:],[]], [[:],[]], true ) ch_versions = ch_versions.mix(CNVKIT_BATCH.out.versions) } From 8e0c1bb12157257a6804135b8d037403f8bd1728 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 6 Sep 2023 10:49:24 +0200 Subject: [PATCH 027/234] update test --- tests/pipeline/cnvkit.nf.test.snap | 4 ++-- tests/pipeline/default.nf.test.snap | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/pipeline/cnvkit.nf.test.snap b/tests/pipeline/cnvkit.nf.test.snap index f52ff4b..7807a75 100644 --- a/tests/pipeline/cnvkit.nf.test.snap +++ b/tests/pipeline/cnvkit.nf.test.snap @@ -1,7 +1,7 @@ { "software_versions": { "content": [ - "{CNVKIT_BATCH={cnvkit=0.9.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, Workflow={nf-core/createpanelrefs=1.0dev}}" + "{CNVKIT_BATCH={cnvkit=0.9.10}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, Workflow={nf-core/createpanelrefs=1.0dev}}" ], "timestamp": "2023-07-08T16:51:32+0000" }, @@ -15,4 +15,4 @@ ], "timestamp": "2023-07-08T16:51:32+0000" } -} \ No newline at end of file +} diff --git a/tests/pipeline/default.nf.test.snap b/tests/pipeline/default.nf.test.snap index d41eebb..807ac4d 100644 --- a/tests/pipeline/default.nf.test.snap +++ b/tests/pipeline/default.nf.test.snap @@ -1,7 +1,7 @@ { "software_versions": { "content": [ - "{CNVKIT_BATCH={cnvkit=0.9.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, Workflow={nf-core/createpanelrefs=1.0dev}}" + "{CNVKIT_BATCH={cnvkit=0.9.10}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, Workflow={nf-core/createpanelrefs=1.0dev}}" ], "timestamp": "2023-07-08T16:47:57+0000" }, @@ -15,4 +15,4 @@ ], "timestamp": "2023-07-08T16:47:57+0000" } -} \ No newline at end of file +} From 389c98f7945fcb39fbef6f44a5fcf4d62e9bf7f6 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 6 Sep 2023 11:08:18 +0200 Subject: [PATCH 028/234] update modules --- modules.json | 4 +- .../nf-core/gatk4/annotateintervals/main.nf | 14 +++---- .../nf-core/gatk4/annotateintervals/meta.yml | 37 ++++++++++++++++++- modules/nf-core/gatk4/filterintervals/main.nf | 10 ++--- .../nf-core/gatk4/filterintervals/meta.yml | 21 ++++++++--- 5 files changed, 66 insertions(+), 20 deletions(-) diff --git a/modules.json b/modules.json index c1fdb56..b9bec7d 100644 --- a/modules.json +++ b/modules.json @@ -17,7 +17,7 @@ }, "gatk4/annotateintervals": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "016397249f05f5af7b97e3ea8d64458a07df2928", "installed_by": ["modules"] }, "gatk4/collectreadcounts": { @@ -27,7 +27,7 @@ }, "gatk4/filterintervals": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "016397249f05f5af7b97e3ea8d64458a07df2928", "installed_by": ["modules"] }, "gatk4/preprocessintervals": { diff --git a/modules/nf-core/gatk4/annotateintervals/main.nf b/modules/nf-core/gatk4/annotateintervals/main.nf index 394a6aa..8c5f59b 100644 --- a/modules/nf-core/gatk4/annotateintervals/main.nf +++ b/modules/nf-core/gatk4/annotateintervals/main.nf @@ -9,13 +9,13 @@ process GATK4_ANNOTATEINTERVALS { input: tuple val(meta), path(intervals) - path(fasta) - path(fasta_fai) - path(dict) - path(mappable_regions) - path(mappable_regions_tbi) - path(segmental_duplication_regions) - path(segmental_duplication_regions_tbi) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fasta_fai) + tuple val(meta4), path(dict) + tuple val(meta5), path(mappable_regions) + tuple val(meta6), path(mappable_regions_tbi) + tuple val(meta7), path(segmental_duplication_regions) + tuple val(meta8), path(segmental_duplication_regions_tbi) output: tuple val(meta), path("*.tsv"), emit: annotated_intervals diff --git a/modules/nf-core/gatk4/annotateintervals/meta.yml b/modules/nf-core/gatk4/annotateintervals/meta.yml index 6f16c95..6f33e87 100644 --- a/modules/nf-core/gatk4/annotateintervals/meta.yml +++ b/modules/nf-core/gatk4/annotateintervals/meta.yml @@ -24,35 +24,70 @@ input: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - intervals: - type: file(s) + type: file description: One or more interval files to annotate pattern: "*.{interval_list,list,bed}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta: type: file description: The reference FASTA file pattern: "*.{fasta,fa}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta_fai: type: file description: The index of the reference FASTA file pattern: "*.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - dict: type: file description: The sequence dictionary reference FASTA file pattern: "*.dict" + - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - mappable_regions: type: file description: | Optional - Umap single-read mappability track The track should correspond to the appropriate read length and overlapping intervals must be merged pattern: "*.bed(.gz)?" + - meta6: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - mappable_regions_tbi: type: file description: Optional - The index of the gzipped umap single-read mappability track pattern: "*.bed.gz.tbi" + - meta7: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - segmental_duplication_regions: type: file description: Optional - Segmental-duplication track pattern: "*.bed(.gz)?" + - meta8: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - segmental_duplication_regions_tbi: type: file description: Optional - The index of the gzipped segmental-duplication track diff --git a/modules/nf-core/gatk4/filterintervals/main.nf b/modules/nf-core/gatk4/filterintervals/main.nf index 2ce7702..5032fa3 100644 --- a/modules/nf-core/gatk4/filterintervals/main.nf +++ b/modules/nf-core/gatk4/filterintervals/main.nf @@ -9,8 +9,8 @@ process GATK4_FILTERINTERVALS { input: tuple val(meta), path(intervals) - path read_counts - path annotated_intervals + tuple val(meta2), path(read_counts) + tuple val(meta3), path(annotated_intervals) output: tuple val(meta), path("*.interval_list"), emit: interval_list @@ -20,10 +20,10 @@ process GATK4_FILTERINTERVALS { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def annotated_command = annotated_intervals ? "--annotated-intervals $annotated_intervals" : "" - def read_counts_command = read_counts ? "--input $read_counts" : "" + def annotated_command = annotated_intervals ? "--annotated-intervals $annotated_intervals" : "" + def read_counts_command = read_counts ? read_counts.collect{"--input $it"}.join(" ") : "" def avail_mem = 3072 if (!task.memory) { diff --git a/modules/nf-core/gatk4/filterintervals/meta.yml b/modules/nf-core/gatk4/filterintervals/meta.yml index efd66f4..b253d78 100644 --- a/modules/nf-core/gatk4/filterintervals/meta.yml +++ b/modules/nf-core/gatk4/filterintervals/meta.yml @@ -2,7 +2,8 @@ name: "gatk4_filterintervals" description: Filters intervals based on annotations and/or count statistics. keywords: - gatk4 - - gatk4_filterintervals + - interval_list + - filterintervals tools: - "gatk4": description: @@ -16,6 +17,15 @@ tools: input: - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - intervals: + type: file + description: Processed interval list file (processed_intervals.interval_list) + pattern: "*.interval_list" + - meta2: type: map description: | Groovy Map containing sample information @@ -24,10 +34,11 @@ input: type: file description: Read counts input file pattern: "*.{tsv, hdf5}" - - intervals: - type: file - description: Processed interval list file (processed_intervals.interval_list) - pattern: "*.interval_list" + - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - annotated_intervals: type: file description: Annotated intervals TSV file (annotated_intervals.tsv). From 0bbd4e2ecf57735654edca8a1390c0a3baf65cee Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 6 Sep 2023 12:41:11 +0200 Subject: [PATCH 029/234] plug annotate and filterintervals into subworkflow --- conf/modules/germlinecnvcaller.config | 10 +++++++ .../local/germlinecnvcaller_cohort.nf | 26 +++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/conf/modules/germlinecnvcaller.config b/conf/modules/germlinecnvcaller.config index 5bf4498..cd005fd 100644 --- a/conf/modules/germlinecnvcaller.config +++ b/conf/modules/germlinecnvcaller.config @@ -24,4 +24,14 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + + withName: GATK4_ANNOTATEINTERVALS { + ext.args = {"--imr OVERLAPPING_ONLY"} + ext.prefix = {" ${meta.id}_annotated"} + } + + withName: GATK4_FILTERINTERVALS { + ext.args = {"--imr OVERLAPPING_ONLY"} + ext.prefix = {" ${meta.id}_filtered"} + } } diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort.nf index e7537d4..59a2710 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort.nf @@ -1,4 +1,6 @@ +include { GATK4_ANNOTATEINTERVALS } from '../../modules/nf-core/gatk4/annotateintervals/main' include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main' +include { GATK4_FILTERINTERVALS } from '../../modules/nf-core/gatk4/filterintervals/main' include { GATK4_PREPROCESSINTERVALS } from '../../modules/nf-core/gatk4/preprocessintervals/main' include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main' include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' @@ -18,13 +20,33 @@ workflow GERMLINECNVCALLER_COHORT { ch_bam_bai = ch_bam.join(SAMTOOLS_INDEX.out.bai) - GATK4_PREPROCESSINTERVALS (ch_fasta, ch_fai, ch_dict, [[:],[]], [[:],[]]) + GATK4_PREPROCESSINTERVALS (ch_fasta, + ch_fai, + ch_dict, + [[:],[]], [[:],[]]) ch_bam_bai .combine(GATK4_PREPROCESSINTERVALS.out.interval_list.map{it -> it[1]}) .set {ch_readcounts_in} - GATK4_COLLECTREADCOUNTS(ch_readcounts_in, ch_fasta, ch_fai, ch_dict) + GATK4_COLLECTREADCOUNTS (ch_readcounts_in, + ch_fasta, + ch_fai, + ch_dict) + .tsv + .collect{it[1]} + .map {tsvs -> [[id:'cohort'],tsvs]} + .set { ch_readcounts_out } + + GATK4_ANNOTATEINTERVALS (GATK4_PREPROCESSINTERVALS.out.interval_list, + ch_fasta, + ch_fai, + ch_dict, + [[:],[]], [[:],[]], [[:],[]], [[:],[]]) + + GATK4_FILTERINTERVALS (GATK4_PREPROCESSINTERVALS.out.interval_list, + ch_readcounts_out, + GATK4_ANNOTATEINTERVALS.out.annotated_intervals) ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) From 13ab07af1e606f1971dca10ab0598349be0a894e Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 6 Sep 2023 13:14:54 +0200 Subject: [PATCH 030/234] add modules --- modules.json | 15 ++++ .../determinegermlinecontigploidy/main.nf | 74 ++++++++++++++++++ .../determinegermlinecontigploidy/meta.yml | 75 +++++++++++++++++++ .../nf-core/gatk4/germlinecnvcaller/main.nf | 69 +++++++++++++++++ .../nf-core/gatk4/germlinecnvcaller/meta.yml | 62 +++++++++++++++ .../gatk4/postprocessgermlinecnvcalls/main.nf | 68 +++++++++++++++++ .../postprocessgermlinecnvcalls/meta.yml | 65 ++++++++++++++++ 7 files changed, 428 insertions(+) create mode 100644 modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf create mode 100644 modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml create mode 100644 modules/nf-core/gatk4/germlinecnvcaller/main.nf create mode 100644 modules/nf-core/gatk4/germlinecnvcaller/meta.yml create mode 100644 modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf create mode 100644 modules/nf-core/gatk4/postprocessgermlinecnvcalls/meta.yml diff --git a/modules.json b/modules.json index b9bec7d..79139e4 100644 --- a/modules.json +++ b/modules.json @@ -25,11 +25,26 @@ "git_sha": "d25bf48327e86a7f737047a57ec264b90e22ce3d", "installed_by": ["modules"] }, + "gatk4/determinegermlinecontigploidy": { + "branch": "master", + "git_sha": "8c4542e5d421c4690cf1fa6ec729e9304763fdaf", + "installed_by": ["modules"] + }, "gatk4/filterintervals": { "branch": "master", "git_sha": "016397249f05f5af7b97e3ea8d64458a07df2928", "installed_by": ["modules"] }, + "gatk4/germlinecnvcaller": { + "branch": "master", + "git_sha": "8c4542e5d421c4690cf1fa6ec729e9304763fdaf", + "installed_by": ["modules"] + }, + "gatk4/postprocessgermlinecnvcalls": { + "branch": "master", + "git_sha": "8c4542e5d421c4690cf1fa6ec729e9304763fdaf", + "installed_by": ["modules"] + }, "gatk4/preprocessintervals": { "branch": "master", "git_sha": "1226419498a14d17f98d12d6488d333b0dbd0418", diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf b/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf new file mode 100644 index 0000000..3c21e74 --- /dev/null +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf @@ -0,0 +1,74 @@ + +process GATK4_DETERMINEGERMLINECONTIGPLOIDY { + tag "$meta.id" + label 'process_single' + + //Conda is not supported at the moment: https://github.com/broadinstitute/gatk/issues/7811 + container "nf-core/gatk:4.4.0.0" //Biocontainers is missing a package + + input: + tuple val(meta), path(counts), path(bed), path(exclude_beds) + tuple val(meta2), path(ploidy_model) + path(contig_ploidy_table) + + output: + tuple val(meta), path("${prefix}-calls"), emit: calls + tuple val(meta), path("${prefix}-model"), emit: model, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GATK4_DETERMINEGERMLINECONTIGPLOIDY module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def intervals = bed ? "--intervals ${bed}" : "" + def exclude = exclude_beds ? exclude_beds.collect(){"--exclude-intervals $it"}.join(" ") : "" + def contig_ploidy = contig_ploidy_table ? "--contig-ploidy-priors ${contig_ploidy_table}" : "" + def model = ploidy_model ? "--model ${ploidy_model}" : "" + def input_list = counts.collect(){"--input $it"}.join(" ") + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK DetermineGermlineContigPloidy] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M" DetermineGermlineContigPloidy \\ + ${input_list} \\ + --output ./ \\ + --output-prefix ${prefix} \\ + ${intervals} \\ + ${exclude} \\ + ${contig_ploidy} \\ + ${model} \\ + --tmp-dir . \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GATK4_DETERMINEGERMLINECONTIGPLOIDY module does not support Conda. Please use Docker / Singularity / Podman instead." + } + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}-calls + touch ${prefix}-model + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml b/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml new file mode 100644 index 0000000..667d622 --- /dev/null +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml @@ -0,0 +1,75 @@ +name: "gatk4_determinegermlinecontigploidy" +description: Determines the baseline contig ploidy for germline samples given counts data +keywords: + - gatk4 + - determinegermlinecontigploidy + - counts + - copy number +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - counts: + type: file + description: One or more count TSV files created with gatk/collectreadcounts + pattern: "*.tsv" + - bed: + type: file + description: Optional - A bed file containing the intervals to include in the process + pattern: "*.bed" + - exclude_beds: + type: file + description: Optional - One or more bed files containing intervals to exclude from the process + pattern: "*.bed" + - contig_ploidy_table: + type: file + description: The contig ploidy priors table + pattern: "*.tsv" + - ploidy_model: + type: directory + description: | + Optional - A folder containing the ploidy model. + When a model is supplied to tool will run in CASE mode. + pattern: '*-model/' + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - calls: + type: directory + description: A folder containing the calls from the input files + pattern: "*-calls/" + - model: + type: directory + description: | + A folder containing the model from the input files. + This will only be created in COHORT mode (when no model is supplied to the process). + pattern: "*-model/" + +authors: + - "@nvnieuwk" diff --git a/modules/nf-core/gatk4/germlinecnvcaller/main.nf b/modules/nf-core/gatk4/germlinecnvcaller/main.nf new file mode 100644 index 0000000..973a0d3 --- /dev/null +++ b/modules/nf-core/gatk4/germlinecnvcaller/main.nf @@ -0,0 +1,69 @@ +process GATK4_GERMLINECNVCALLER { + tag "$meta.id" + label 'process_single' + + //Conda is not supported at the moment: https://github.com/broadinstitute/gatk/issues/7811 + container "nf-core/gatk:4.4.0.0" //Biocontainers is missing a package + + input: + tuple val(meta), path(tsv), path(intervals), path(ploidy), path(model) + + output: + tuple val(meta), path("*-cnv-calls/*-calls"), emit: calls, optional: true + tuple val(meta), path("*-cnv-model/*-model"), emit: model, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GATK4_GERMLINECNVCALLER module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def intervals_command = intervals ? "--intervals ${intervals}" : "" + def ploidy_command = ploidy ? "--contig-ploidy-calls ${ploidy}" : "" + def model_command = model ? "--model ${model}" : "" + def input_list = tsv.collect{"--input $it"}.join(' ') + def output_command = model ? "--output ${prefix}-cnv-calls" : "--output ${prefix}-cnv-model" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK GermlineCNVCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}g" GermlineCNVCaller \\ + $input_list \\ + $ploidy_command \\ + $output_command \\ + --output-prefix $prefix \\ + $args \\ + $intervals_command \\ + $model_command + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GATK4_GERMLINECNVCALLER module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix}-cnv-calls/${prefix}-calls + mkdir -p ${prefix}-cnv-model/${prefix}-model + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/germlinecnvcaller/meta.yml b/modules/nf-core/gatk4/germlinecnvcaller/meta.yml new file mode 100644 index 0000000..b743092 --- /dev/null +++ b/modules/nf-core/gatk4/germlinecnvcaller/meta.yml @@ -0,0 +1,62 @@ +name: "gatk4_germlinecnvcaller" +description: Calls copy-number variants in germline samples given their counts and the output of DetermineGermlineContigPloidy. +keywords: + - gatk + - gatk4_germlinecnvcaller + - germline contig ploidy +tools: + - "gatk4": + description: + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tsv: + type: file + description: One or more count TSV files created with gatk/collectreadcounts + pattern: "*.tsv" + - intervals: + type: file + description: Optional - A bed file containing the intervals to include in the process + pattern: "*.bed" + - model: + type: directory + description: Optional - directory containing the model produced by germlinecnvcaller cohort mode + pattern: "*-cnv-model/*-model" + - ploidy: + type: file + description: Directory containing ploidy calls produced by determinegermlinecontigploidy case or cohort mode + pattern: "*-calls" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - calls: + type: file + description: Tar gzipped directory containing calls produced by germlinecnvcaller case mode + pattern: "*-cnv-calls/*-calls" + - model: + type: directory + description: Optional - Tar gzipped directory containing the model produced by germlinecnvcaller cohort mode + pattern: "*-cnv-model/*-model" + +authors: + - "@ryanjameskennedy" + - "@ViktorHy" diff --git a/modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf b/modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf new file mode 100644 index 0000000..d622304 --- /dev/null +++ b/modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf @@ -0,0 +1,68 @@ +process GATK4_POSTPROCESSGERMLINECNVCALLS { + tag "$meta.id" + label 'process_single' + + //Conda is not supported at the moment: https://github.com/broadinstitute/gatk/issues/7811 + container "nf-core/gatk:4.4.0.0" //Biocontainers is missing a package + + input: + tuple val(meta), path(calls), path(model), path(ploidy) + + output: + tuple val(meta), path("*_genotyped_intervals.vcf.gz") , emit: intervals, optional: true + tuple val(meta), path("*_genotyped_segments.vcf.gz") , emit: segments, optional: true + tuple val(meta), path("*_denoised.vcf.gz") , emit: denoised, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GATK4_POSTPROCESSGERMLINECNVCALLS module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def calls_command = calls ? calls.collect{"--calls-shard-path $it"}.join(' ') : "" + def model_command = model ? model.collect{"--model-shard-path $it"}.join(' ') : "" + def ploidy_command = ploidy ? "--contig-ploidy-calls ${ploidy}" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK GermlineCNVCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}g" PostprocessGermlineCNVCalls \\ + $calls_command \\ + $model_command \\ + $ploidy_command \\ + --output-genotyped-intervals ${prefix}_genotyped_intervals.vcf.gz \\ + --output-genotyped-segments ${prefix}_genotyped_segments.vcf.gz \\ + --output-denoised-copy-ratios ${prefix}_denoised.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GATK4_POSTPROCESSGERMLINECNVCALLS module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_genotyped_intervals.vcf.gz + touch ${prefix}_genotyped_segments.vcf.gz + touch ${prefix}_denoised.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/postprocessgermlinecnvcalls/meta.yml b/modules/nf-core/gatk4/postprocessgermlinecnvcalls/meta.yml new file mode 100644 index 0000000..92e06ca --- /dev/null +++ b/modules/nf-core/gatk4/postprocessgermlinecnvcalls/meta.yml @@ -0,0 +1,65 @@ +name: "gatk4_postprocessgermlinecnvcalls" +description: Postprocesses the output of GermlineCNVCaller and generates VCFs and denoised copy ratios +keywords: + - gatk4 + - postprocessgermlinecnvcalls + - copy number +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037593411-PostprocessGermlineCNVCalls + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ploidy: + type: directory + description: | + Optional - A folder containing the ploidy model. + When a model is supplied to tool will run in CASE mode. + pattern: "*-calls/" + - calls: + type: directory + description: A folder containing the calls from the input files + pattern: "*-cnv-calls/*-calls" + - model: + type: directory + description: | + A folder containing the model from the input files. + This will only be created in COHORT mode (when no model is supplied to the process). + pattern: "*-cnv-model/*-model" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - denoised: + type: file + description: Denoised copy ratio file + pattern: "*.vcf.gz" + - segments: + type: file + description: Segments VCF file + pattern: "*.vcf.gz" + - intervals: + type: file + description: Intervals VCF file + pattern: "*.vcf.gz" + +authors: + - "@ryanjameskennedy" From 982ebde34aab40c3308ffbfe8dc988d9c4c61378 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 6 Sep 2023 13:39:53 +0200 Subject: [PATCH 031/234] add determinecontigploidy --- conf/modules/germlinecnvcaller.config | 4 ++ conf/test.config | 1 + nextflow_schema.json | 8 ++++ .../local/germlinecnvcaller_cohort.nf | 43 ++++++++++++++----- workflows/createpanelrefs.nf | 12 +++--- 5 files changed, 50 insertions(+), 18 deletions(-) diff --git a/conf/modules/germlinecnvcaller.config b/conf/modules/germlinecnvcaller.config index cd005fd..2fb9dbd 100644 --- a/conf/modules/germlinecnvcaller.config +++ b/conf/modules/germlinecnvcaller.config @@ -34,4 +34,8 @@ process { ext.args = {"--imr OVERLAPPING_ONLY"} ext.prefix = {" ${meta.id}_filtered"} } + + withName: GATK4_DETERMINEGERMLINECONTIGPLOIDY { + ext.args = {"--imr OVERLAPPING_ONLY"} + } } diff --git a/conf/test.config b/conf/test.config index cd90561..c57c73c 100644 --- a/conf/test.config +++ b/conf/test.config @@ -29,4 +29,5 @@ params { genome = null igenomes_ignore = true fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" + ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" } diff --git a/nextflow_schema.json b/nextflow_schema.json index 99050ca..93c4ec5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -103,6 +103,14 @@ "fa_icon": "fas fa-ban", "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + }, + "ploidy_priors": { + "type": "string", + "exists": true, + "format": "file-path", + "mimetype": "text/plain", + "description": "Path to a file containing ploidy priors table.", + "fa_icon": "fas fa-file" } } }, diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort.nf index 59a2710..1add4b0 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort.nf @@ -1,15 +1,19 @@ -include { GATK4_ANNOTATEINTERVALS } from '../../modules/nf-core/gatk4/annotateintervals/main' -include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main' -include { GATK4_FILTERINTERVALS } from '../../modules/nf-core/gatk4/filterintervals/main' -include { GATK4_PREPROCESSINTERVALS } from '../../modules/nf-core/gatk4/preprocessintervals/main' -include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main' -include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { GATK4_ANNOTATEINTERVALS } from '../../modules/nf-core/gatk4/annotateintervals/main' +include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main' +include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../modules/nf-core/gatk4/determinegermlinecontigploidy/main' +include { GATK4_FILTERINTERVALS } from '../../modules/nf-core/gatk4/filterintervals/main' +include { GATK4_GERMLINECNVCALLER } from '../../modules/nf-core/gatk4/germlinecnvcaller/main' +include { GATK4_POSTPROCESSGERMLINECNVCALLS } from '../../modules/nf-core/gatk4/postprocessgermlinecnvcalls/main' +include { GATK4_PREPROCESSINTERVALS } from '../../modules/nf-core/gatk4/preprocessintervals/main' +include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main' +include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' workflow GERMLINECNVCALLER_COHORT { take: - ch_bam // channel: [mandatory] [ val(meta), [path(bam)] ] - ch_fasta // channel: [mandatory] [ val(meta), [path(fasta)] ] + ch_bam // channel: [mandatory] [ val(meta), [path(bam)] ] + ch_fasta // channel: [mandatory] [ val(meta), [path(fasta)] ] + ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] main: ch_versions = Channel.empty() @@ -34,8 +38,8 @@ workflow GERMLINECNVCALLER_COHORT { ch_fai, ch_dict) .tsv - .collect{it[1]} - .map {tsvs -> [[id:'cohort'],tsvs]} + .collect { it[1] } + .map {tsv -> [[id:'cohort'],tsv]} .set { ch_readcounts_out } GATK4_ANNOTATEINTERVALS (GATK4_PREPROCESSINTERVALS.out.interval_list, @@ -48,7 +52,24 @@ workflow GERMLINECNVCALLER_COHORT { ch_readcounts_out, GATK4_ANNOTATEINTERVALS.out.annotated_intervals) + ch_readcounts_out + .combine(GATK4_FILTERINTERVALS.out.interval_list) + .map{ meta, counts, meta2, il -> [meta, counts, il, []] } + .set {ch_contigploidy_in} + + GATK4_DETERMINEGERMLINECONTIGPLOIDY (ch_contigploidy_in, + [[:],[]], + ch_ploidy_priors) + + + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + ch_versions = ch_versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) + ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_ANNOTATEINTERVALS.out.versions) + ch_versions = ch_versions.mix(GATK4_FILTERINTERVALS.out.versions) + ch_versions = ch_versions.mix(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.versions) emit: versions = ch_versions diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 72ce6d8..a76322f 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -43,7 +43,8 @@ ch_input = ch_from_samplesheet.map{meta, bam, bai, cram, crai -> } // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ch_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it.baseName], it] }.collect() +ch_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it.baseName], it] }.collect() +ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() : Channel.empty() /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -66,6 +67,8 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // +include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS @@ -80,11 +83,6 @@ include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch/ma include { MULTIQC } from '../modules/nf-core/multiqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -// -// SUBWORKFLOW imports -// - -include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -114,7 +112,7 @@ workflow CREATEPANELREFS { if (params.tools && params.tools.split(',').contains('germlinecnvcaller')) { - GERMLINECNVCALLER_COHORT(ch_input, ch_fasta) + GERMLINECNVCALLER_COHORT(ch_input, ch_fasta, ch_ploidy_priors) ch_versions = ch_versions.mix(GERMLINECNVCALLER_COHORT.out.versions) } From f47f555b568d9b3fb9c0c300595917691b51447d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 7 Sep 2023 23:13:05 +0200 Subject: [PATCH 032/234] first draft --- conf/modules/germlinecnvcaller.config | 20 +++++ modules.json | 6 +- .../nf-core/gatk4/germlinecnvcaller/main.nf | 6 +- .../nf-core/gatk4/germlinecnvcaller/meta.yml | 14 ++-- .../nf-core/gatk4/intervallisttools/main.nf | 73 +++++++++++++++++++ .../nf-core/gatk4/intervallisttools/meta.yml | 47 ++++++++++++ .../gatk4/postprocessgermlinecnvcalls/main.nf | 68 ----------------- .../postprocessgermlinecnvcalls/meta.yml | 65 ----------------- .../local/germlinecnvcaller_cohort.nf | 21 +++++- 9 files changed, 174 insertions(+), 146 deletions(-) create mode 100644 modules/nf-core/gatk4/intervallisttools/main.nf create mode 100644 modules/nf-core/gatk4/intervallisttools/meta.yml delete mode 100644 modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf delete mode 100644 modules/nf-core/gatk4/postprocessgermlinecnvcalls/meta.yml diff --git a/conf/modules/germlinecnvcaller.config b/conf/modules/germlinecnvcaller.config index 2fb9dbd..16ee380 100644 --- a/conf/modules/germlinecnvcaller.config +++ b/conf/modules/germlinecnvcaller.config @@ -35,7 +35,27 @@ process { ext.prefix = {" ${meta.id}_filtered"} } + withName: GATK4_INTERVALLISTTOOLS { + ext.args = {"--SUBDIVISION_MODE INTERVAL_COUNT --SCATTER_CONTENT 2"} + } + withName: GATK4_DETERMINEGERMLINECONTIGPLOIDY { ext.args = {"--imr OVERLAPPING_ONLY"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/germlinecnvcaller/determinegermlinecontigploidy" }, + pattern: "*-model", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } + + withName: GATK4_GERMLINECNVCALLER { + ext.args = {"--imr OVERLAPPING_ONLY --run-mode COHORT"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/germlinecnvcaller/germlinecnvcaller" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } diff --git a/modules.json b/modules.json index 79139e4..118f42b 100644 --- a/modules.json +++ b/modules.json @@ -37,12 +37,12 @@ }, "gatk4/germlinecnvcaller": { "branch": "master", - "git_sha": "8c4542e5d421c4690cf1fa6ec729e9304763fdaf", + "git_sha": "16bda00336e449b83d9b62abaa614f3880664ffb", "installed_by": ["modules"] }, - "gatk4/postprocessgermlinecnvcalls": { + "gatk4/intervallisttools": { "branch": "master", - "git_sha": "8c4542e5d421c4690cf1fa6ec729e9304763fdaf", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "gatk4/preprocessintervals": { diff --git a/modules/nf-core/gatk4/germlinecnvcaller/main.nf b/modules/nf-core/gatk4/germlinecnvcaller/main.nf index 973a0d3..fed285c 100644 --- a/modules/nf-core/gatk4/germlinecnvcaller/main.nf +++ b/modules/nf-core/gatk4/germlinecnvcaller/main.nf @@ -9,8 +9,9 @@ process GATK4_GERMLINECNVCALLER { tuple val(meta), path(tsv), path(intervals), path(ploidy), path(model) output: - tuple val(meta), path("*-cnv-calls/*-calls"), emit: calls, optional: true - tuple val(meta), path("*-cnv-model/*-model"), emit: model, optional: true + tuple val(meta), path("*-cnv-model/*-calls"), emit: cohortcalls, optional: true + tuple val(meta), path("*-cnv-model/*-model"), emit: cohortmodel, optional: true + tuple val(meta), path("*-cnv-calls/*-calls"), emit: casecalls , optional: true path "versions.yml" , emit: versions when: @@ -60,6 +61,7 @@ process GATK4_GERMLINECNVCALLER { """ mkdir -p ${prefix}-cnv-calls/${prefix}-calls mkdir -p ${prefix}-cnv-model/${prefix}-model + mkdir -p ${prefix}-cnv-model/${prefix}-calls cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gatk4/germlinecnvcaller/meta.yml b/modules/nf-core/gatk4/germlinecnvcaller/meta.yml index b743092..36cd527 100644 --- a/modules/nf-core/gatk4/germlinecnvcaller/meta.yml +++ b/modules/nf-core/gatk4/germlinecnvcaller/meta.yml @@ -34,7 +34,7 @@ input: description: Optional - directory containing the model produced by germlinecnvcaller cohort mode pattern: "*-cnv-model/*-model" - ploidy: - type: file + type: directory description: Directory containing ploidy calls produced by determinegermlinecontigploidy case or cohort mode pattern: "*-calls" @@ -48,14 +48,18 @@ output: type: file description: File containing software versions pattern: "versions.yml" - - calls: - type: file + - cohortcalls: + type: directory description: Tar gzipped directory containing calls produced by germlinecnvcaller case mode - pattern: "*-cnv-calls/*-calls" - - model: + pattern: "*-cnv-model/*-calls" + - cohortmodel: type: directory description: Optional - Tar gzipped directory containing the model produced by germlinecnvcaller cohort mode pattern: "*-cnv-model/*-model" + - casecalls: + type: directory + description: Tar gzipped directory containing calls produced by germlinecnvcaller case mode + pattern: "*-cnv-calls/*-calls" authors: - "@ryanjameskennedy" diff --git a/modules/nf-core/gatk4/intervallisttools/main.nf b/modules/nf-core/gatk4/intervallisttools/main.nf new file mode 100644 index 0000000..0054659 --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/main.nf @@ -0,0 +1,73 @@ +process GATK4_INTERVALLISTTOOLS { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::gatk4=4.4.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(intervals) + + output: + tuple val(meta), path("*_split/*/*.interval_list"), emit: interval_list + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK IntervalListTools] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + + mkdir ${prefix}_split + + gatk --java-options "-Xmx${avail_mem}M" IntervalListTools \\ + --INPUT $intervals \\ + --OUTPUT ${prefix}_split \\ + --TMP_DIR . \\ + $args + + python3 < versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix}_split/temp_0001_of_6 + mkdir -p ${prefix}_split/temp_0002_of_6 + mkdir -p ${prefix}_split/temp_0003_of_6 + mkdir -p ${prefix}_split/temp_0004_of_6 + touch ${prefix}_split/temp_0001_of_6/1scattered.interval_list + touch ${prefix}_split/temp_0002_of_6/2scattered.interval_list + touch ${prefix}_split/temp_0003_of_6/3scattered.interval_list + touch ${prefix}_split/temp_0004_of_6/4scattered.interval_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/intervallisttools/meta.yml b/modules/nf-core/gatk4/intervallisttools/meta.yml new file mode 100644 index 0000000..804645f --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/meta.yml @@ -0,0 +1,47 @@ +name: gatk4_intervallisttools + +description: Splits the interval list file into unique, equally-sized interval files and place it under a directory +keywords: + - sort + - bed + - interval list +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + + - interval_list: + type: file + description: Interval list file + pattern: "*.interval_list" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - interval_list: + type: file + description: Interval list files + pattern: "*.interval_list" + +authors: + - "@praveenraj2018" diff --git a/modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf b/modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf deleted file mode 100644 index d622304..0000000 --- a/modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf +++ /dev/null @@ -1,68 +0,0 @@ -process GATK4_POSTPROCESSGERMLINECNVCALLS { - tag "$meta.id" - label 'process_single' - - //Conda is not supported at the moment: https://github.com/broadinstitute/gatk/issues/7811 - container "nf-core/gatk:4.4.0.0" //Biocontainers is missing a package - - input: - tuple val(meta), path(calls), path(model), path(ploidy) - - output: - tuple val(meta), path("*_genotyped_intervals.vcf.gz") , emit: intervals, optional: true - tuple val(meta), path("*_genotyped_segments.vcf.gz") , emit: segments, optional: true - tuple val(meta), path("*_denoised.vcf.gz") , emit: denoised, optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "GATK4_POSTPROCESSGERMLINECNVCALLS module does not support Conda. Please use Docker / Singularity / Podman instead." - } - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def calls_command = calls ? calls.collect{"--calls-shard-path $it"}.join(' ') : "" - def model_command = model ? model.collect{"--model-shard-path $it"}.join(' ') : "" - def ploidy_command = ploidy ? "--contig-ploidy-calls ${ploidy}" : "" - - def avail_mem = 3072 - if (!task.memory) { - log.info '[GATK GermlineCNVCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() - } - """ - gatk --java-options "-Xmx${avail_mem}g" PostprocessGermlineCNVCalls \\ - $calls_command \\ - $model_command \\ - $ploidy_command \\ - --output-genotyped-intervals ${prefix}_genotyped_intervals.vcf.gz \\ - --output-genotyped-segments ${prefix}_genotyped_segments.vcf.gz \\ - --output-denoised-copy-ratios ${prefix}_denoised.vcf.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ - - stub: - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "GATK4_POSTPROCESSGERMLINECNVCALLS module does not support Conda. Please use Docker / Singularity / Podman instead." - } - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}_genotyped_intervals.vcf.gz - touch ${prefix}_genotyped_segments.vcf.gz - touch ${prefix}_denoised.vcf.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/gatk4/postprocessgermlinecnvcalls/meta.yml b/modules/nf-core/gatk4/postprocessgermlinecnvcalls/meta.yml deleted file mode 100644 index 92e06ca..0000000 --- a/modules/nf-core/gatk4/postprocessgermlinecnvcalls/meta.yml +++ /dev/null @@ -1,65 +0,0 @@ -name: "gatk4_postprocessgermlinecnvcalls" -description: Postprocesses the output of GermlineCNVCaller and generates VCFs and denoised copy ratios -keywords: - - gatk4 - - postprocessgermlinecnvcalls - - copy number -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037593411-PostprocessGermlineCNVCalls - doi: 10.1158/1538-7445.AM2017-3590 - licence: ["Apache-2.0"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - ploidy: - type: directory - description: | - Optional - A folder containing the ploidy model. - When a model is supplied to tool will run in CASE mode. - pattern: "*-calls/" - - calls: - type: directory - description: A folder containing the calls from the input files - pattern: "*-cnv-calls/*-calls" - - model: - type: directory - description: | - A folder containing the model from the input files. - This will only be created in COHORT mode (when no model is supplied to the process). - pattern: "*-cnv-model/*-model" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - denoised: - type: file - description: Denoised copy ratio file - pattern: "*.vcf.gz" - - segments: - type: file - description: Segments VCF file - pattern: "*.vcf.gz" - - intervals: - type: file - description: Intervals VCF file - pattern: "*.vcf.gz" - -authors: - - "@ryanjameskennedy" diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort.nf index 1add4b0..0fb1ae8 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort.nf @@ -3,7 +3,7 @@ include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../modules/nf-core/gatk4/determinegermlinecontigploidy/main' include { GATK4_FILTERINTERVALS } from '../../modules/nf-core/gatk4/filterintervals/main' include { GATK4_GERMLINECNVCALLER } from '../../modules/nf-core/gatk4/germlinecnvcaller/main' -include { GATK4_POSTPROCESSGERMLINECNVCALLS } from '../../modules/nf-core/gatk4/postprocessgermlinecnvcalls/main' +include { GATK4_INTERVALLISTTOOLS } from '../../modules/nf-core/gatk4/intervallisttools/main' include { GATK4_PREPROCESSINTERVALS } from '../../modules/nf-core/gatk4/preprocessintervals/main' include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main' include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' @@ -52,24 +52,39 @@ workflow GERMLINECNVCALLER_COHORT { ch_readcounts_out, GATK4_ANNOTATEINTERVALS.out.annotated_intervals) + GATK4_INTERVALLISTTOOLS(GATK4_FILTERINTERVALS.out.interval_list) + .interval_list + .map {meta, it -> it} + .flatten() + .set { ch_intervallist_out } + ch_readcounts_out .combine(GATK4_FILTERINTERVALS.out.interval_list) - .map{ meta, counts, meta2, il -> [meta, counts, il, []] } + .map{ meta, counts, meta2, il -> [meta, counts, il, []] } .set {ch_contigploidy_in} GATK4_DETERMINEGERMLINECONTIGPLOIDY (ch_contigploidy_in, [[:],[]], ch_ploidy_priors) + ch_readcounts_out + .combine(ch_intervallist_out) + .combine(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.calls) + .map{ meta, counts, il, meta2, calls -> [meta + [id:il.baseName], counts, il, calls, []] } + .set {ch_cnvcaller_in} + + GATK4_GERMLINECNVCALLER (ch_cnvcaller_in) ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) ch_versions = ch_versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions.first()) ch_versions = ch_versions.mix(GATK4_ANNOTATEINTERVALS.out.versions) ch_versions = ch_versions.mix(GATK4_FILTERINTERVALS.out.versions) + ch_versions = ch_versions.mix(GATK4_INTERVALLISTTOOLS.out.versions) ch_versions = ch_versions.mix(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.versions) + ch_versions = ch_versions.mix(GATK4_GERMLINECNVCALLER.out.versions.first()) emit: versions = ch_versions From 3f890328d2ef38397b5bb7f916d533f251339d5e Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 8 Sep 2023 14:42:45 +0200 Subject: [PATCH 033/234] schema_update --- conf/modules/germlinecnvcaller.config | 15 +++-- nextflow_schema.json | 79 ++++++++++++++++++++++----- 2 files changed, 73 insertions(+), 21 deletions(-) diff --git a/conf/modules/germlinecnvcaller.config b/conf/modules/germlinecnvcaller.config index 16ee380..3ce034d 100644 --- a/conf/modules/germlinecnvcaller.config +++ b/conf/modules/germlinecnvcaller.config @@ -13,14 +13,17 @@ process { withName: GATK4_PREPROCESSINTERVALS { - ext.args = {"--imr OVERLAPPING_ONLY"} + ext.args = { ["--imr OVERLAPPING_ONLY", + "--padding ${params.padding}", + "--bin-length ${params.binlength}"].join(" ") + } } withName: GATK4_COLLECTREADCOUNTS { - ext.args = {"--format TSV --imr OVERLAPPING_ONLY"} + ext.args = {"--format ${params.readcount_format} --imr OVERLAPPING_ONLY"} publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/germlinecnvcaller/readcounts" }, + path: { "${params.outdir}/germlinecnvcaller/readcounts" }, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -36,14 +39,14 @@ process { } withName: GATK4_INTERVALLISTTOOLS { - ext.args = {"--SUBDIVISION_MODE INTERVAL_COUNT --SCATTER_CONTENT 2"} + ext.args = {"--SUBDIVISION_MODE INTERVAL_COUNT --SCATTER_CONTENT ${params.scatter_content}"} } withName: GATK4_DETERMINEGERMLINECONTIGPLOIDY { ext.args = {"--imr OVERLAPPING_ONLY"} publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/germlinecnvcaller/determinegermlinecontigploidy" }, + path: { "${params.outdir}/germlinecnvcaller/determinegermlinecontigploidy" }, pattern: "*-model", saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -53,7 +56,7 @@ process { ext.args = {"--imr OVERLAPPING_ONLY --run-mode COHORT"} publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/germlinecnvcaller/germlinecnvcaller" }, + path: { "${params.outdir}/germlinecnvcaller/germlinecnvcaller" }, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } diff --git a/nextflow_schema.json b/nextflow_schema.json index 93c4ec5..f3d91f5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -5,6 +5,67 @@ "description": "Generate Panel of Normals, models or other similar references from lots of samples", "type": "object", "definitions": { + "germlinecnvcaller_options": { + "title": "Germlinecnvcaller options", + "type": "object", + "description": "Options used by the germlinecnvcaller subworkflow", + "default": "", + "properties": { + "binlength": { + "type": "number", + "default": 1000, + "description": "Length (in bp) of the bins. If zero, no binning will be performed.", + "fa_icon": "fas fa-sort-numeric-down", + "help_text": "Used by GATK's PreprocessIntervals. GATK recommends a binlength of 1000 for WGS analysis, and 0 for WES analysis. " + }, + "mappableregions": { + "type": "string", + "exists": true, + "description": "Path to Umap single-read mappability track in .bed or .bed.gz format. Overlapping intervals must be merged.", + "format": "file-path", + "fa_icon": "fas fa-file", + "help_text": "Used by GATK's AnnotateIntervals." + }, + "padding": { + "type": "number", + "description": "Length (in bp) of the padding regions on each side of the intervals.", + "default": 0, + "fa_icon": "fas fa-sort-numeric-down", + "help_text": "Used by GATK's PreprocessIntervals. GATK recommends a padding of 0 for WGS analysis, and 250 for WES analysis." + }, + "ploidy_priors": { + "type": "string", + "exists": true, + "format": "file-path", + "mimetype": "text/plain", + "description": "Path to a file containing ploidy priors table.", + "fa_icon": "fas fa-file", + "help_text": "Used by GATK's DeterminGermlineContigPloidy." + }, + "readcount_format": { + "type": "string", + "description": "Output file format for count data", + "default": "HDF5", + "fa_icon": "fas fa-align-left", + "enum": ["HDF5", "TSV"] + }, + "scatter_content": { + "type": "number", + "description": "When scattering with this argument, each of the resultant files will (ideally) have this amount of interval-counts.", + "default": 5000, + "fa_icon": "fas fa-sort-numeric-down", + "help_text": "Used by GATK/Picards's IntervalListTools." + }, + "segmentalduplications": { + "type": "string", + "exists": true, + "description": "Path to segmental-duplication track in .bed or .bed.gz format. Overlapping intervals must be merged.", + "format": "file-path", + "fa_icon": "fas fa-file", + "help_text": "Used by GATK's AnnotateIntervals." + } + } + }, "input_output_options": { "title": "Input/output options", "type": "object", @@ -103,14 +164,6 @@ "fa_icon": "fas fa-ban", "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." - }, - "ploidy_priors": { - "type": "string", - "exists": true, - "format": "file-path", - "mimetype": "text/plain", - "description": "Path to a file containing ploidy priors table.", - "fa_icon": "fas fa-file" } } }, @@ -208,14 +261,12 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "publish_dir_mode": { @@ -239,7 +290,6 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -254,7 +304,6 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "default": false, "hidden": true }, "hook_url": { @@ -293,7 +342,6 @@ "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", - "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, @@ -301,7 +349,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", - "default": false, "hidden": true, "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." }, @@ -309,7 +356,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", - "default": false, "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } @@ -317,6 +363,9 @@ } }, "allOf": [ + { + "$ref": "#/definitions/germlinecnvcaller_options" + }, { "$ref": "#/definitions/input_output_options" }, From 084d8a55bc47967275980639ae08851c951172ab Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 8 Sep 2023 16:17:05 +0200 Subject: [PATCH 034/234] add parameters --- conf/modules/germlinecnvcaller.config | 2 +- conf/test.config | 5 ++++- main.nf | 2 -- nextflow.config | 9 +++++++++ nextflow_schema.json | 8 ++++---- subworkflows/local/germlinecnvcaller_cohort.nf | 4 +++- 6 files changed, 21 insertions(+), 9 deletions(-) diff --git a/conf/modules/germlinecnvcaller.config b/conf/modules/germlinecnvcaller.config index 3ce034d..2b51440 100644 --- a/conf/modules/germlinecnvcaller.config +++ b/conf/modules/germlinecnvcaller.config @@ -15,7 +15,7 @@ process { withName: GATK4_PREPROCESSINTERVALS { ext.args = { ["--imr OVERLAPPING_ONLY", "--padding ${params.padding}", - "--bin-length ${params.binlength}"].join(" ") + "--bin-length ${params.bin_length}"].join(" ") } } diff --git a/conf/test.config b/conf/test.config index c57c73c..41e6bed 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,9 +25,12 @@ params { // Main options tools = 'cnvkit' + //Germlinecnvcaller options + scatter_content = 2 + ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" + // Small reference genome genome = null igenomes_ignore = true fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" - ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" } diff --git a/main.nf b/main.nf index b6f4193..9ab1fea 100644 --- a/main.nf +++ b/main.nf @@ -16,9 +16,7 @@ nextflow.enable.dsl = 2 GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATE & PRINT PARAMETER SUMMARY diff --git a/nextflow.config b/nextflow.config index 0eba204..32e37c2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -19,6 +19,15 @@ params { // Building Panel of Normals and models tools = null // No default, must be specified + // Germlinecnvcaller options + bin_length = 1000 + mappable_regions = null + padding = 0 + ploidy_priors = null + readcount_format = 'HDF5' + scatter_content = 5000 + segmental_duplications = null + // MultiQC options multiqc_config = null multiqc_title = null diff --git a/nextflow_schema.json b/nextflow_schema.json index f3d91f5..8f15b48 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -11,14 +11,14 @@ "description": "Options used by the germlinecnvcaller subworkflow", "default": "", "properties": { - "binlength": { + "bin_length": { "type": "number", "default": 1000, "description": "Length (in bp) of the bins. If zero, no binning will be performed.", "fa_icon": "fas fa-sort-numeric-down", - "help_text": "Used by GATK's PreprocessIntervals. GATK recommends a binlength of 1000 for WGS analysis, and 0 for WES analysis. " + "help_text": "Used by GATK's PreprocessIntervals. GATK recommends a bin length of 1000 for WGS analysis, and 0 for WES analysis. " }, - "mappableregions": { + "mappable_regions": { "type": "string", "exists": true, "description": "Path to Umap single-read mappability track in .bed or .bed.gz format. Overlapping intervals must be merged.", @@ -56,7 +56,7 @@ "fa_icon": "fas fa-sort-numeric-down", "help_text": "Used by GATK/Picards's IntervalListTools." }, - "segmentalduplications": { + "segmental_duplications": { "type": "string", "exists": true, "description": "Path to segmental-duplication track in .bed or .bed.gz format. Overlapping intervals must be merged.", diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort.nf index 0fb1ae8..558c2d2 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort.nf @@ -37,7 +37,9 @@ workflow GERMLINECNVCALLER_COHORT { ch_fasta, ch_fai, ch_dict) - .tsv + + GATK4_COLLECTREADCOUNTS.out.tsv + .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) .collect { it[1] } .map {tsv -> [[id:'cohort'],tsv]} .set { ch_readcounts_out } From 813513534b01154853295e87fa472f6954c08471 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 8 Sep 2023 16:23:44 +0200 Subject: [PATCH 035/234] update publishDir rules --- conf/modules/germlinecnvcaller.config | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/conf/modules/germlinecnvcaller.config b/conf/modules/germlinecnvcaller.config index 2b51440..808951b 100644 --- a/conf/modules/germlinecnvcaller.config +++ b/conf/modules/germlinecnvcaller.config @@ -12,6 +12,12 @@ process { + withName: '.*GERMLINECNVCALLER_COHORT.*' { + publishDir = [ + enabled: false + ] + } + withName: GATK4_PREPROCESSINTERVALS { ext.args = { ["--imr OVERLAPPING_ONLY", "--padding ${params.padding}", From d64e849990d5c770c7f230c1fc20d9863caa6305 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 8 Sep 2023 16:34:06 +0200 Subject: [PATCH 036/234] update config --- conf/modules/germlinecnvcaller.config | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/conf/modules/germlinecnvcaller.config b/conf/modules/germlinecnvcaller.config index 808951b..ff73a73 100644 --- a/conf/modules/germlinecnvcaller.config +++ b/conf/modules/germlinecnvcaller.config @@ -18,14 +18,14 @@ process { ] } - withName: GATK4_PREPROCESSINTERVALS { + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS' { ext.args = { ["--imr OVERLAPPING_ONLY", "--padding ${params.padding}", "--bin-length ${params.bin_length}"].join(" ") } } - withName: GATK4_COLLECTREADCOUNTS { + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_COLLECTREADCOUNTS' { ext.args = {"--format ${params.readcount_format} --imr OVERLAPPING_ONLY"} publishDir = [ mode: params.publish_dir_mode, @@ -34,21 +34,21 @@ process { ] } - withName: GATK4_ANNOTATEINTERVALS { + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_ANNOTATEINTERVALS' { ext.args = {"--imr OVERLAPPING_ONLY"} ext.prefix = {" ${meta.id}_annotated"} } - withName: GATK4_FILTERINTERVALS { + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_FILTERINTERVALS' { ext.args = {"--imr OVERLAPPING_ONLY"} ext.prefix = {" ${meta.id}_filtered"} } - withName: GATK4_INTERVALLISTTOOLS { + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_INTERVALLISTTOOLS' { ext.args = {"--SUBDIVISION_MODE INTERVAL_COUNT --SCATTER_CONTENT ${params.scatter_content}"} } - withName: GATK4_DETERMINEGERMLINECONTIGPLOIDY { + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_DETERMINEGERMLINECONTIGPLOIDY' { ext.args = {"--imr OVERLAPPING_ONLY"} publishDir = [ mode: params.publish_dir_mode, @@ -58,7 +58,7 @@ process { ] } - withName: GATK4_GERMLINECNVCALLER { + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_GERMLINECNVCALLER' { ext.args = {"--imr OVERLAPPING_ONLY --run-mode COHORT"} publishDir = [ mode: params.publish_dir_mode, From badbcc9320b257f8ba5a1db07a3459b90cace0ec Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 8 Sep 2023 22:02:06 +0200 Subject: [PATCH 037/234] update bam index generation logic --- .../local/germlinecnvcaller_cohort.nf | 21 +++++++++++++++---- workflows/createpanelrefs.nf | 8 +++---- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort.nf index 558c2d2..5c15bba 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort.nf @@ -11,8 +11,8 @@ include { SAMTOOLS_INDEX } from '../../modules/nf-core/samt workflow GERMLINECNVCALLER_COHORT { take: - ch_bam // channel: [mandatory] [ val(meta), [path(bam)] ] - ch_fasta // channel: [mandatory] [ val(meta), [path(fasta)] ] + ch_input // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] main: @@ -20,9 +20,22 @@ workflow GERMLINECNVCALLER_COHORT { ch_fai = SAMTOOLS_FAIDX (ch_fasta, [[:],[]]).fai ch_dict = PICARD_CREATESEQUENCEDICTIONARY (ch_fasta).reference_dict - ch_bai = SAMTOOLS_INDEX (ch_bam) - ch_bam_bai = ch_bam.join(SAMTOOLS_INDEX.out.bai) + ch_input + .branch { meta, bam, bai -> + bam_with_index: bai.size() > 0 + return [meta, bam, bai] + bam_without_index: bai.size() == 0 + return [meta, bam] + } + .set { ch_for_mix } + + SAMTOOLS_INDEX (ch_for_mix.bam_without_index) + + ch_bam_bai = ch_for_mix.bam_without_index + .join(SAMTOOLS_INDEX.out.bai) + .mix(ch_for_mix.bam_with_index) + .dump{"test $it"} GATK4_PREPROCESSINTERVALS (ch_fasta, ch_fai, diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index cf1f709..7b353a1 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -38,8 +38,8 @@ for (param in checkPathParamList) if (param) file(param, checkIfExists: true) ch_from_samplesheet = Channel.fromSamplesheet("input") ch_input = ch_from_samplesheet.map{meta, bam, bai, cram, crai -> - if (bam) return [ meta + [data_type:"bam"], bam ] - if (cram) return [ meta + [data_type:"cram" ], cram ] + if (bam) return [ meta + [data_type:"bam"], bam, bai ] + if (cram) return [ meta + [data_type:"cram"], cram, crai ] } // Initialize file channels based on params, defined in the params.genomes[params.genome] scope @@ -101,9 +101,9 @@ workflow CREATEPANELREFS { if (params.tools && params.tools.split(',').contains('cnvkit')) { ch_input - .map{ meta, bam -> + .map{ meta, align, index -> new_meta = meta + [id:"panel"] - [new_meta, bam] + [new_meta, align] } .groupTuple() .branch{ From 8286b59a40d6961654e5f6516a14f5303e5abaa9 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 8 Sep 2023 22:28:51 +0200 Subject: [PATCH 038/234] add fai and dict --- main.nf | 2 ++ nextflow_schema.json | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/main.nf b/main.nf index 9ab1fea..cb31f5e 100644 --- a/main.nf +++ b/main.nf @@ -17,6 +17,8 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.fai = WorkflowMain.getGenomeAttribute(params, 'fai') +params.dict = WorkflowMain.getGenomeAttribute(params, 'dict') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATE & PRINT PARAMETER SUMMARY diff --git a/nextflow_schema.json b/nextflow_schema.json index 8f15b48..b1752cb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -164,6 +164,24 @@ "fa_icon": "fas fa-ban", "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + }, + "dict": { + "type": "string", + "description": "Path to sequence dictionary file", + "pattern": "^\\\\S+\\\\.dict$", + "format": "file-path", + "fa_icon": "fas fa-file", + "exists": true, + "mimetype": "text/plain" + }, + "fai": { + "type": "string", + "description": "Path to fasta index file", + "pattern": "^\\\\S+\\\\.fn?a(sta)?\\\\.fai$", + "format": "file-path", + "fa_icon": "fas fa-file", + "exists": true, + "mimetype": "text/plain" } } }, From c6ebd04c6cdc163656d613b1231b45348c0547cf Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 8 Sep 2023 23:40:59 +0200 Subject: [PATCH 039/234] add dict and fai --- conf/modules/germlinecnvcaller.config | 8 ++++++++ nextflow_schema.json | 4 ++-- .../local/germlinecnvcaller_cohort.nf | 20 +++++++++++++++---- workflows/createpanelrefs.nf | 11 +++++++++- 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/conf/modules/germlinecnvcaller.config b/conf/modules/germlinecnvcaller.config index ff73a73..25fb04a 100644 --- a/conf/modules/germlinecnvcaller.config +++ b/conf/modules/germlinecnvcaller.config @@ -18,6 +18,14 @@ process { ] } + withName: '.*GERMLINECNVCALLER_COHORT:SAMTOOLS_FAIDX' { + ext.when = { params.fai.equals(null) } + } + + withName: '.*GERMLINECNVCALLER_COHORT:PICARD_CREATESEQUENCEDICTIONARY' { + ext.when = { params.dict.equals(null) } + } + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS' { ext.args = { ["--imr OVERLAPPING_ONLY", "--padding ${params.padding}", diff --git a/nextflow_schema.json b/nextflow_schema.json index b1752cb..1e05670 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -168,7 +168,7 @@ "dict": { "type": "string", "description": "Path to sequence dictionary file", - "pattern": "^\\\\S+\\\\.dict$", + "pattern": "^\\S+\\.dict$", "format": "file-path", "fa_icon": "fas fa-file", "exists": true, @@ -177,7 +177,7 @@ "fai": { "type": "string", "description": "Path to fasta index file", - "pattern": "^\\\\S+\\\\.fn?a(sta)?\\\\.fai$", + "pattern": "^\\S+\\.fn?a(sta)?\\.fai$", "format": "file-path", "fa_icon": "fas fa-file", "exists": true, diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort.nf index 5c15bba..a630211 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort.nf @@ -11,15 +11,28 @@ include { SAMTOOLS_INDEX } from '../../modules/nf-core/samt workflow GERMLINECNVCALLER_COHORT { take: - ch_input // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_user_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_user_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_input // channel: [mandatory] [ val(meta), path(bam), path(bai) ] ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] main: ch_versions = Channel.empty() - ch_fai = SAMTOOLS_FAIDX (ch_fasta, [[:],[]]).fai - ch_dict = PICARD_CREATESEQUENCEDICTIONARY (ch_fasta).reference_dict + SAMTOOLS_FAIDX (ch_fasta, [[:],[]]) + + PICARD_CREATESEQUENCEDICTIONARY (ch_fasta) + + ch_user_dict + .mix(PICARD_CREATESEQUENCEDICTIONARY.out.reference_dict) + .collect() + .set { ch_dict } + + ch_user_fai + .mix(SAMTOOLS_FAIDX.out.fai) + .collect() + .set { ch_fai } ch_input .branch { meta, bam, bai -> @@ -35,7 +48,6 @@ workflow GERMLINECNVCALLER_COHORT { ch_bam_bai = ch_for_mix.bam_without_index .join(SAMTOOLS_INDEX.out.bai) .mix(ch_for_mix.bam_with_index) - .dump{"test $it"} GATK4_PREPROCESSINTERVALS (ch_fasta, ch_fai, diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 7b353a1..d6cc32e 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -43,6 +43,10 @@ ch_input = ch_from_samplesheet.map{meta, bam, bai, cram, crai -> } // Initialize file channels based on params, defined in the params.genomes[params.genome] scope +ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() + : Channel.empty() +ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() + : Channel.empty() ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() : Channel.empty() ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() @@ -119,7 +123,12 @@ workflow CREATEPANELREFS { if (params.tools && params.tools.split(',').contains('germlinecnvcaller')) { - GERMLINECNVCALLER_COHORT(ch_input, ch_fasta, ch_ploidy_priors) + GERMLINECNVCALLER_COHORT(ch_dict, + ch_fai, + ch_fasta, + ch_input, + ch_ploidy_priors) + ch_versions = ch_versions.mix(GERMLINECNVCALLER_COHORT.out.versions) } From ed59800adbc989132f615ce8b22a8abe4f068e48 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 9 Sep 2023 09:14:45 +0200 Subject: [PATCH 040/234] update documentation --- CITATIONS.md | 4 ++ README.md | 21 ++++++--- conf/modules/germlinecnvcaller.config | 10 +++++ docs/output.md | 22 ++++++++- docs/usage.md | 64 +++++++++++++-------------- 5 files changed, 81 insertions(+), 40 deletions(-) diff --git a/CITATIONS.md b/CITATIONS.md index 7d9a92a..a50d9e5 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -14,6 +14,10 @@ > Talevich E, Shain AH, Botton T, Bastian BC (2016) CNVkit: Genome-Wide Copy Number Detection and Visualization from Targeted DNA Sequencing. PLoS Comput Biol 12(4): e1004873. doi: 10.1371/journal.pcbi.1004873. PubMed PMID: 27100738. PubMed Central PMCID: PMC4839673. +- [GATK] (https://genome.cshlp.org/content/20/9/1297) + + > McKenna A, Hanna M, Banks E, et al. The Genome Analysis Toolkit: A MapReduce framework for analyzing next-generation DNA sequencing data. Genome Res. 2010;20(9):1297-1303. doi:10.1101/gr.107524.110 + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. diff --git a/README.md b/README.md index 45e13c5..b823c61 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,8 @@ 1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 2. Build Panel of Normals for [`CNVKIT`](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873) -3. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +3. Build ploidy and cnv calling models for [`GATK's germlinecnvcaller workflow`](https://genome.cshlp.org/content/20/9/1297) +4. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) ## Usage @@ -34,14 +35,19 @@ First, prepare a samplesheet with your input data that looks as follows: `samplesheet.csv`: ```csv -sample,bam -sample1,sample1.bam -sample2,sample2.bam -sample3,sample3.bam -sample4,sample4.bam +sample,bam,bai,cram,crai +sample1,sample1.bam,sample1.bai,, +sample2,sample2.bam,,, +sample3,sample3.bam,sample3.bai,, +sample4,sample4.bam,,, ``` -Each row represents a bam file. +Each row in the samplesheet represents an alignment file, and it is important that you provide the files in the right format for the analysis you want to run. + +| Tool | Alignment format | +| ----------------- | ---------------- | +| cnvkit | bam | +| germlinecnvcaller | bam | Now, you can run the pipeline using: @@ -49,6 +55,7 @@ Now, you can run the pipeline using: nextflow run nf-core/createpanelrefs \ -profile \ --input samplesheet.csv \ + --tools \ --genome GATK.GRCh38 \ --outdir ``` diff --git a/conf/modules/germlinecnvcaller.config b/conf/modules/germlinecnvcaller.config index 25fb04a..7e54ee4 100644 --- a/conf/modules/germlinecnvcaller.config +++ b/conf/modules/germlinecnvcaller.config @@ -20,10 +20,20 @@ process { withName: '.*GERMLINECNVCALLER_COHORT:SAMTOOLS_FAIDX' { ext.when = { params.fai.equals(null) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/germlinecnvcaller/references" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } withName: '.*GERMLINECNVCALLER_COHORT:PICARD_CREATESEQUENCEDICTIONARY' { ext.when = { params.dict.equals(null) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/germlinecnvcaller/references" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } withName: '.*GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS' { diff --git a/docs/output.md b/docs/output.md index b9d6a63..e2cf332 100644 --- a/docs/output.md +++ b/docs/output.md @@ -12,8 +12,8 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [FastQC](#fastqc) - Raw read QC - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline +- [GATK's germlinecnvcaller](#germlinecnvcaller) - Publish read counts, ploidy and cnvcalling models that can be used to call cnv's in the case mode. - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution ### FastQC @@ -37,6 +37,26 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d > **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +### GATK germlinecnvcaller + +
+Output files + +- `results/germlinecnvcaller/` + - `determinecontigploidy` + - `cohort-model`: Contig ploidy model. + - `germlinecnvcaller` + - `*_model`: CNV caller model for each scattered shard. + - `readcounts` + - `*.hdf5|.tsv`: Read count statistics for each sample. + - `references` + - `*.dict`: Sequence dictionary file. This file is not published if user supplies this file to the pipeline using the `--dict` parameter. + - `*.fai`: Fasta index file. This file is not published if user supplies this file to the pipeline using the `--fai` parameter. + +
+ +[GATK](https://github.com/broadinstitute/gatk) is a toolkit which offers a wide variety of tools with a primary focus on variant discovery and genotyping. In this pipeline we have implemented GATK's germlinecnvcalling workflow for analysing a cohort of samples. The output files generated from this analysis can be used for analysing samples in case mode. For more information about the workflow and output files, see GATK's documentation [here.](https://gatk.broadinstitute.org/hc/en-us/articles/360035531152--How-to-Call-common-and-rare-germline-copy-number-variants) + ### MultiQC
diff --git a/docs/usage.md b/docs/usage.md index 359496c..1bc6a75 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,50 +6,35 @@ ## Introduction - - ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use the `--input` parameter to specify its location. It has to be a comma-separated file and recognizes the following fields as column headers. + +| Fields | Description | +| -------- | ------------------------------ | +| `sample` | Custom sample name. | +| `bam` | Alignment file in bam format. | +| `bai` | bam file index. | +| `cram` | Alignment file in cram format. | +| `crai` | cram file index. | ```bash --input '[path to samplesheet file]' ``` -### Multiple runs of the same sample - -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: - -```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz -``` - ### Full samplesheet -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. +The pipeline will auto-detect whether a sample is aligned in bam/cram format using the information provided in the samplesheet. The samplesheet can have either bam/cram files with or without their indices. -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 3 samples, where one sample, `SAMPLE_1` is missing its index file. ```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +sample,bam,bai +SAMPLE_1,sample1.bam, +SAMPLE_2,sample2.bam,sample2.bam.bai +SAMPLE_3,sample3.bam,sample3.bam.bai ``` -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | - An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. ## Running the pipeline @@ -57,10 +42,12 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/createpanelrefs --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker +nextflow run nf-core/createpanelrefs --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker --tools cnvkit,germlinecnvcaller ``` -This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. +This will launch the pipeline with the `docker` configuration profile, and generate reference files necessary for cnvkit and germlinecnvcaller. To learn more about what tool options are recognized by the pipeline, check the pipeline's documentation on the [nf-core website](https://nf-co.re/createpanelrefs/dev/parameters/). + +See below for more information about profiles. Note that the pipeline will create the following files in your working directory: @@ -114,6 +101,19 @@ To further assist in reproducbility, you can use share and re-use [parameter fil > 💡 If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +## Workflow specific arguments + +### germlinecnvcaller + +If you are running the pipeline to generate references for the GATK's germlinecnvcalling workflow, you should ensure that you have provided all the mandatory options specified in the table below. + +| Mandatory | Optional | +| ------------------------- | -------- | +| fasta/genomes | fai | +| ploidy_priors1 | dict | + +1 To learn more about this file, see [this comment](https://gatk.broadinstitute.org/hc/en-us/community/posts/360074399831/comments/13441240230299) on GATK forum.
+ ## Core Nextflow arguments > **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). From 2d9ec3f7d52fbe87d3a7a9b3cc6c67c3fdcc261e Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 9 Sep 2023 23:02:07 +0200 Subject: [PATCH 041/234] enable processing a mix of bam and cram files --- .../local/germlinecnvcaller_cohort.nf | 89 +++++++++++-------- 1 file changed, 52 insertions(+), 37 deletions(-) diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort.nf index a630211..b797a8e 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort.nf @@ -14,12 +14,15 @@ workflow GERMLINECNVCALLER_COHORT { ch_user_dict // channel: [mandatory] [ val(meta), path(dict) ] ch_user_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_input // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] main: ch_versions = Channel.empty() + // + // Prepare references + // SAMTOOLS_FAIDX (ch_fasta, [[:],[]]) PICARD_CREATESEQUENCEDICTIONARY (ch_fasta) @@ -34,30 +37,53 @@ workflow GERMLINECNVCALLER_COHORT { .collect() .set { ch_fai } - ch_input - .branch { meta, bam, bai -> - bam_with_index: bai.size() > 0 - return [meta, bam, bai] - bam_without_index: bai.size() == 0 - return [meta, bam] - } - .set { ch_for_mix } - - SAMTOOLS_INDEX (ch_for_mix.bam_without_index) - - ch_bam_bai = ch_for_mix.bam_without_index - .join(SAMTOOLS_INDEX.out.bai) - .mix(ch_for_mix.bam_with_index) - GATK4_PREPROCESSINTERVALS (ch_fasta, ch_fai, ch_dict, [[:],[]], [[:],[]]) - ch_bam_bai - .combine(GATK4_PREPROCESSINTERVALS.out.interval_list.map{it -> it[1]}) - .set {ch_readcounts_in} + GATK4_ANNOTATEINTERVALS (GATK4_PREPROCESSINTERVALS.out.interval_list, + ch_fasta, + ch_fai, + ch_dict, + [[:],[]], [[:],[]], [[:],[]], [[:],[]]) + GATK4_INTERVALLISTTOOLS(GATK4_FILTERINTERVALS.out.interval_list) + .interval_list + .map {meta, it -> it} + .flatten() + .set { ch_intervallist_out } + + // + // Filter out files that lack indices, and generate them + // + ch_input + .branch { meta, alignment, index -> + alignment_with_index: index.size() > 0 + return [meta, alignment, index] + alignment_without_index: index.size() == 0 + return [meta, alignment] + } + .set { ch_for_mix } + + SAMTOOLS_INDEX (ch_for_mix.alignment_without_index) + + SAMTOOLS_INDEX.out.bai + .mix(SAMTOOLS_INDEX.out.crai) + .set { ch_index } + + // + // Collect alignment files and their indices + // + ch_for_mix.alignment_without_index + .join(ch_index) + .mix(ch_for_mix.alignment_with_index) + .combine(GATK4_PREPROCESSINTERVALS.out.interval_list.map{it -> it[1]}) + .set {ch_readcounts_in} + + // + // Collect read counts + // GATK4_COLLECTREADCOUNTS (ch_readcounts_in, ch_fasta, ch_fai, @@ -69,36 +95,25 @@ workflow GERMLINECNVCALLER_COHORT { .map {tsv -> [[id:'cohort'],tsv]} .set { ch_readcounts_out } - GATK4_ANNOTATEINTERVALS (GATK4_PREPROCESSINTERVALS.out.interval_list, - ch_fasta, - ch_fai, - ch_dict, - [[:],[]], [[:],[]], [[:],[]], [[:],[]]) GATK4_FILTERINTERVALS (GATK4_PREPROCESSINTERVALS.out.interval_list, ch_readcounts_out, GATK4_ANNOTATEINTERVALS.out.annotated_intervals) - GATK4_INTERVALLISTTOOLS(GATK4_FILTERINTERVALS.out.interval_list) - .interval_list - .map {meta, it -> it} - .flatten() - .set { ch_intervallist_out } - ch_readcounts_out - .combine(GATK4_FILTERINTERVALS.out.interval_list) - .map{ meta, counts, meta2, il -> [meta, counts, il, []] } - .set {ch_contigploidy_in} + .combine(GATK4_FILTERINTERVALS.out.interval_list) + .map{ meta, counts, meta2, il -> [meta, counts, il, []] } + .set {ch_contigploidy_in} GATK4_DETERMINEGERMLINECONTIGPLOIDY (ch_contigploidy_in, [[:],[]], ch_ploidy_priors) ch_readcounts_out - .combine(ch_intervallist_out) - .combine(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.calls) - .map{ meta, counts, il, meta2, calls -> [meta + [id:il.baseName], counts, il, calls, []] } - .set {ch_cnvcaller_in} + .combine(ch_intervallist_out) + .combine(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.calls) + .map{ meta, counts, il, meta2, calls -> [meta + [id:il.baseName], counts, il, calls, []] } + .set {ch_cnvcaller_in} GATK4_GERMLINECNVCALLER (ch_cnvcaller_in) From afab318e0a2257d04f884cb311be9d87fb18c91c Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 9 Sep 2023 23:09:10 +0200 Subject: [PATCH 042/234] update comment --- subworkflows/local/germlinecnvcaller_cohort.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort.nf index b797a8e..29607c3 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort.nf @@ -82,7 +82,7 @@ workflow GERMLINECNVCALLER_COHORT { .set {ch_readcounts_in} // - // Collect read counts + // Collect read counts, and generate models // GATK4_COLLECTREADCOUNTS (ch_readcounts_in, ch_fasta, From 46693cc05e162de53ee6f8520f8b1b02ecce7ee3 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Sep 2023 01:22:48 +0200 Subject: [PATCH 043/234] add test --- .../local/germlinecnvcaller_cohort.nf | 12 +- tests/config/tags.yml | 2 + .../local/germlinecnvcaller_cohort.nf.test | 52 +++++++ .../germlinecnvcaller_cohort.nf.test.snap | 131 ++++++++++++++++++ .../germlinecnvcaller_software_versions.yaml | 20 +++ 5 files changed, 211 insertions(+), 6 deletions(-) create mode 100644 tests/pipeline/subworkflows/local/germlinecnvcaller_cohort.nf.test create mode 100644 tests/pipeline/subworkflows/local/germlinecnvcaller_cohort.nf.test.snap create mode 100644 tests/test_assets/germlinecnvcaller_software_versions.yaml diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort.nf index 29607c3..9d26d10 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort.nf @@ -48,12 +48,6 @@ workflow GERMLINECNVCALLER_COHORT { ch_dict, [[:],[]], [[:],[]], [[:],[]], [[:],[]]) - GATK4_INTERVALLISTTOOLS(GATK4_FILTERINTERVALS.out.interval_list) - .interval_list - .map {meta, it -> it} - .flatten() - .set { ch_intervallist_out } - // // Filter out files that lack indices, and generate them // @@ -100,6 +94,12 @@ workflow GERMLINECNVCALLER_COHORT { ch_readcounts_out, GATK4_ANNOTATEINTERVALS.out.annotated_intervals) + GATK4_INTERVALLISTTOOLS(GATK4_FILTERINTERVALS.out.interval_list) + .interval_list + .map {meta, it -> it} + .flatten() + .set { ch_intervallist_out } + ch_readcounts_out .combine(GATK4_FILTERINTERVALS.out.interval_list) .map{ meta, counts, meta2, il -> [meta, counts, il, []] } diff --git a/tests/config/tags.yml b/tests/config/tags.yml index 7e9962d..2ff6812 100644 --- a/tests/config/tags.yml +++ b/tests/config/tags.yml @@ -12,3 +12,5 @@ cnvkit: - conf/modules/cnvkit.config - modules/nf-core/cnvkit/batch/main.nf - tests/cnvkit.nf.test + +germlinecnvcaller: diff --git a/tests/pipeline/subworkflows/local/germlinecnvcaller_cohort.nf.test b/tests/pipeline/subworkflows/local/germlinecnvcaller_cohort.nf.test new file mode 100644 index 0000000..101e685 --- /dev/null +++ b/tests/pipeline/subworkflows/local/germlinecnvcaller_cohort.nf.test @@ -0,0 +1,52 @@ +nextflow_workflow { + + name "Test Workflow GERMLINECNVCALLER_COHORT" + script "subworkflows/local/germlinecnvcaller_cohort.nf" + workflow "GERMLINECNVCALLER_COHORT" + tag "germlinecnvcaller" + + test("Run germlinecnvcaller test") { + + when { + params { + outdir = "$outputDir" + tools = 'germlinecnvcaller' + scatter_content = 2 + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" + ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" + } + workflow { + """ + input[0] = Channel.empty() + input[1] = Channel.empty() + input[2] = Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() + input[3] = Channel.of( + [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"),[]], + [[ id:'test2' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam"),[]]) + input[4] = Channel.fromPath(params.ploidy_priors) + """ + } + } + + then { + assert workflow.success + assert workflow.trace.succeeded().size() == 13 + assert workflow.trace.failed().size() == 0 + + assert snapshot ( + path("$outputDir/germlinecnvcaller/").list() + ).match("germlinecnvcaller") + + def expected = path("$baseDir/tests/test_assets/germlinecnvcaller_software_versions.yaml").yaml.collect() + def observed_list = [] + def observed = workflow.out.versions.collect {f -> path(f).yaml.entrySet()} + observed.stream() + .forEach(observed_list::addAll) + + assertContainsInAnyOrder(observed_list, expected) + + } + + } + +} diff --git a/tests/pipeline/subworkflows/local/germlinecnvcaller_cohort.nf.test.snap b/tests/pipeline/subworkflows/local/germlinecnvcaller_cohort.nf.test.snap new file mode 100644 index 0000000..85727a9 --- /dev/null +++ b/tests/pipeline/subworkflows/local/germlinecnvcaller_cohort.nf.test.snap @@ -0,0 +1,131 @@ +{ + "germlinecnvcaller": { + "content": [ + [ + [ + "contig_ploidy_prior.tsv:md5,7a2f5444b09a1f635a540bbcd23176cf", + "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", + "interval_list.tsv:md5,8c5aaf57cf34ff35b183178a87a9f864", + "mu_mean_bias_j_lowerbound__.tsv:md5,b617c6034a52b4d54911a931bdf45289", + "mu_psi_j_log__.tsv:md5,3e32071c22f02eea9d331f4e6a64e205", + "ploidy_config.json:md5,07e9bdbb9ddfa2d650cdf2c1c19f7fd5", + "std_mean_bias_j_lowerbound__.tsv:md5,6467bec3b9792301e4440bb79ff3fdb2", + "std_psi_j_log__.tsv:md5,754392b7852456cca3ab17222360d48d" + ] + ], + [ + [ + [ + [ + "baseline_copy_number_t.tsv:md5,a8f848d75d241f7932ffdca261bffe7b", + "log_c_emission_tc.tsv:md5,24084fb5e7435a6df43ae75320b0294b", + "log_q_c_tc.tsv:md5,9e915b56ff5d856e6c55dd8e292bb41b", + "mu_denoised_copy_ratio_t.tsv:md5,1f7fc6a88919cca7b6be29665675ef88", + "mu_psi_s_log__.tsv:md5,02545858ccda02a536dc8654c13ed059", + "mu_read_depth_s_log__.tsv:md5,abba1ee920d26594c24a892ed91184e1", + "mu_z_su.tsv:md5,976d7d25d98f6a250ef1a401ce0916c0", + "sample_name.txt:md5,5cf41871eea16c0fbfdcb7f0f664ed95", + "std_denoised_copy_ratio_t.tsv:md5,3655e2309189eda19aa70ecba1539ef2", + "std_psi_s_log__.tsv:md5,6462430dd67574ab7b6975a0b43a3a7a", + "std_read_depth_s_log__.tsv:md5,fd057bd69eea55734b86bb74a05950ab", + "std_z_su.tsv:md5,6642c5bdc1293753f47c17de3fba7f9e" + ], + [ + "baseline_copy_number_t.tsv:md5,51670372d61cf525a2303d293129fc2e", + "log_c_emission_tc.tsv:md5,7b8e68ac6f9e8a7e95dbf79ecf0c803d", + "log_q_c_tc.tsv:md5,6f725f21915557470ade6958922caefc", + "mu_denoised_copy_ratio_t.tsv:md5,d72fcfbfa748805bfc3ea4ec5806ee17", + "mu_psi_s_log__.tsv:md5,b8f8b66afeecce0c49dbde1291c23298", + "mu_read_depth_s_log__.tsv:md5,44eaed835a12482e187d29cb394ee9cf", + "mu_z_su.tsv:md5,14f01b246be1feddb1dc1a2fc1aaaa09", + "sample_name.txt:md5,640a474d6f73b01524287f50ca538c04", + "std_denoised_copy_ratio_t.tsv:md5,a9f76dfc48536f77eb2f2eb9e478bc85", + "std_psi_s_log__.tsv:md5,094d83cd78b0e95721f382b928a4114d", + "std_read_depth_s_log__.tsv:md5,64aa16b9f9e229c5dc8b3e9b436aee2c", + "std_z_su.tsv:md5,4361cc1c87931183100b807489b5607f" + ], + "calling_config.json:md5,6d65995da6a73fcfc7e02df572837921", + "denoising_config.json:md5,e81ec25c546367270e508fad267b9b06", + "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", + "interval_list.tsv:md5,0cba6efc5fa465f7930c901916a3e764" + ], + [ + "calling_config.json:md5,6d65995da6a73fcfc7e02df572837921", + "denoising_config.json:md5,e81ec25c546367270e508fad267b9b06", + "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", + "interval_list.tsv:md5,0cba6efc5fa465f7930c901916a3e764", + "log_q_tau_tk.tsv:md5,cc0465cab0f470449ce6491f2cb84b4d", + "mu_W_tu.tsv:md5,d505e9d819d0891c66a1fae49f18a970", + "mu_ard_u_log__.tsv:md5,3c2320b9d85073e9f04482ef666220ff", + "mu_log_mean_bias_t.tsv:md5,49d8ebde24d59b6ab7308b124d71fa1a", + "mu_psi_t_log__.tsv:md5,64fa5391ecab3600f583c5265b241c1f", + "std_W_tu.tsv:md5,b81ca0b36e880bd0d24f9342891ef5f8", + "std_ard_u_log__.tsv:md5,0ee1ae8a114874d066f30fefbb468ad6", + "std_log_mean_bias_t.tsv:md5,b7d9d5ed45771d8f24bd1f3d6dac5d3f", + "std_psi_t_log__.tsv:md5,845c6ff386e52cfd09f19566888337f9" + ] + ], + [ + [ + [ + "baseline_copy_number_t.tsv:md5,a8f848d75d241f7932ffdca261bffe7b", + "log_c_emission_tc.tsv:md5,8f637945b33995ac807bb26a9e7b93b5", + "log_q_c_tc.tsv:md5,e57cd9043632cbed3b165dda2dff8199", + "mu_denoised_copy_ratio_t.tsv:md5,6f8e05be3f60b7d7094ac255526e8a80", + "mu_psi_s_log__.tsv:md5,5341d83d3e0d4ffa44972c2beb74c1aa", + "mu_read_depth_s_log__.tsv:md5,dc661c73621ac754362ace917fdfeccb", + "mu_z_su.tsv:md5,bdda2270062b4c3a261e7e6302561ab1", + "sample_name.txt:md5,5cf41871eea16c0fbfdcb7f0f664ed95", + "std_denoised_copy_ratio_t.tsv:md5,eaca19d3488cd05353d71dc6d7c372cc", + "std_psi_s_log__.tsv:md5,3f05b40d319abf449eb0723b40bd0b8a", + "std_read_depth_s_log__.tsv:md5,fc1943b7d378f07d35bdbe4f4f9a3815", + "std_z_su.tsv:md5,9f12e0ea3b6cd2c51fd711d00ae75929" + ], + [ + "baseline_copy_number_t.tsv:md5,51670372d61cf525a2303d293129fc2e", + "log_c_emission_tc.tsv:md5,629ef6d8fde7d0654a7f38c624feae58", + "log_q_c_tc.tsv:md5,99f2f764d5925f43d9dd8b3597cf9722", + "mu_denoised_copy_ratio_t.tsv:md5,a622c8a2ce980a7075806b3ce1d72f43", + "mu_psi_s_log__.tsv:md5,868eb75d8539a30d3124ffbb77de7b66", + "mu_read_depth_s_log__.tsv:md5,dbc692cc904598701753ba2b7a6ee85a", + "mu_z_su.tsv:md5,07c5a47783ffd5017e75c9d02e8d4138", + "sample_name.txt:md5,640a474d6f73b01524287f50ca538c04", + "std_denoised_copy_ratio_t.tsv:md5,98c98ec41c3322351c9e2ea4affdd8bb", + "std_psi_s_log__.tsv:md5,08a7cd31719844e7abb324b4861ccde7", + "std_read_depth_s_log__.tsv:md5,2321ed4451d332dce569d5431927b296", + "std_z_su.tsv:md5,8f646acac9aeabc7e311342631963bca" + ], + "calling_config.json:md5,6d65995da6a73fcfc7e02df572837921", + "denoising_config.json:md5,e81ec25c546367270e508fad267b9b06", + "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", + "interval_list.tsv:md5,9f7718372e87e69aeb837678755a9971" + ], + [ + "calling_config.json:md5,6d65995da6a73fcfc7e02df572837921", + "denoising_config.json:md5,e81ec25c546367270e508fad267b9b06", + "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", + "interval_list.tsv:md5,9f7718372e87e69aeb837678755a9971", + "log_q_tau_tk.tsv:md5,0b7eef7c4e838a837309c4df7267ab93", + "mu_W_tu.tsv:md5,0ce68f6fbe7f89039918d864068a911e", + "mu_ard_u_log__.tsv:md5,6e0519d7b10d7ae79bec565c3bde38a4", + "mu_log_mean_bias_t.tsv:md5,c987052cb905076abc1f1a586b0fb89f", + "mu_psi_t_log__.tsv:md5,12fa83b402207dab6c835a8305087e45", + "std_W_tu.tsv:md5,f0f563f160af33e1522b157470f166e9", + "std_ard_u_log__.tsv:md5,9b51cd2bea9386068e643ff39ea20753", + "std_log_mean_bias_t.tsv:md5,768bf7f38caaa34543a6ba778dddc0cb", + "std_psi_t_log__.tsv:md5,4705f29d6828a5d28d0595164853d88a" + ] + ] + ], + [ + "test.hdf5:md5,c37b077e96885c62a4344a44345459c7", + "test2.hdf5:md5,098deec8a7c8b4550f7a5139d226d650" + ], + [ + "genome.dict:md5,aaba331e73d3cec0620b93e292b827b9", + "genome.fasta.fai:md5,3520cd30e1b100e55f578db9c855f685" + ] + ], + "timestamp": "2023-09-10T23:17:19+0000" + } +} \ No newline at end of file diff --git a/tests/test_assets/germlinecnvcaller_software_versions.yaml b/tests/test_assets/germlinecnvcaller_software_versions.yaml new file mode 100644 index 0000000..af8f7fb --- /dev/null +++ b/tests/test_assets/germlinecnvcaller_software_versions.yaml @@ -0,0 +1,20 @@ +"GERMLINECNVCALLER_COHORT:GATK4_FILTERINTERVALS": + gatk4: 4.4.0.0 +"GERMLINECNVCALLER_COHORT:GATK4_COLLECTREADCOUNTS": + gatk4: 4.4.0.0 +"GERMLINECNVCALLER_COHORT:GATK4_GERMLINECNVCALLER": + gatk4: 4.4.0.0 +"GERMLINECNVCALLER_COHORT:GATK4_DETERMINEGERMLINECONTIGPLOIDY": + gatk4: 4.4.0.0 +"GERMLINECNVCALLER_COHORT:GATK4_ANNOTATEINTERVALS": + gatk4: 4.4.0.0 +"GERMLINECNVCALLER_COHORT:GATK4_INTERVALLISTTOOLS": + gatk4: 4.4.0.0 +"GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS": + gatk4: 4.4.0.0 +"GERMLINECNVCALLER_COHORT:SAMTOOLS_FAIDX": + samtools: 1.17 +"GERMLINECNVCALLER_COHORT:SAMTOOLS_INDEX": + samtools: 1.17 +"GERMLINECNVCALLER_COHORT:PICARD_CREATESEQUENCEDICTIONARY": + picard: 3.0.0 From 9a7315efe0745f2375599d628e542cc0ac21317b Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Sep 2023 01:28:02 +0200 Subject: [PATCH 044/234] update tags --- tests/config/tags.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/config/tags.yml b/tests/config/tags.yml index 2ff6812..31777b4 100644 --- a/tests/config/tags.yml +++ b/tests/config/tags.yml @@ -14,3 +14,6 @@ cnvkit: - tests/cnvkit.nf.test germlinecnvcaller: + - conf/modules/germlinecnvcaller.config + - subworkflows/local/germlinecnvcaller_cohort.nf + - tests/pipeline/subworkflows/local/germlinecnvcaller_cohort.nf.snap From 55b63df15ca9b3e24196048f84095b18693cbce9 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Sep 2023 01:35:01 +0200 Subject: [PATCH 045/234] update configs --- conf/modules/germlinecnvcaller.config | 4 ++-- .../germlinecnvcaller_software_versions.yaml | 20 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/conf/modules/germlinecnvcaller.config b/conf/modules/germlinecnvcaller.config index 7e54ee4..b7204f5 100644 --- a/conf/modules/germlinecnvcaller.config +++ b/conf/modules/germlinecnvcaller.config @@ -38,8 +38,8 @@ process { withName: '.*GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS' { ext.args = { ["--imr OVERLAPPING_ONLY", - "--padding ${params.padding}", - "--bin-length ${params.bin_length}"].join(" ") + "--padding ${params.padding}", + "--bin-length ${params.bin_length}"].join(" ") } } diff --git a/tests/test_assets/germlinecnvcaller_software_versions.yaml b/tests/test_assets/germlinecnvcaller_software_versions.yaml index af8f7fb..f7e4521 100644 --- a/tests/test_assets/germlinecnvcaller_software_versions.yaml +++ b/tests/test_assets/germlinecnvcaller_software_versions.yaml @@ -1,20 +1,20 @@ "GERMLINECNVCALLER_COHORT:GATK4_FILTERINTERVALS": - gatk4: 4.4.0.0 + gatk4: 4.4.0.0 "GERMLINECNVCALLER_COHORT:GATK4_COLLECTREADCOUNTS": - gatk4: 4.4.0.0 + gatk4: 4.4.0.0 "GERMLINECNVCALLER_COHORT:GATK4_GERMLINECNVCALLER": - gatk4: 4.4.0.0 + gatk4: 4.4.0.0 "GERMLINECNVCALLER_COHORT:GATK4_DETERMINEGERMLINECONTIGPLOIDY": - gatk4: 4.4.0.0 + gatk4: 4.4.0.0 "GERMLINECNVCALLER_COHORT:GATK4_ANNOTATEINTERVALS": - gatk4: 4.4.0.0 + gatk4: 4.4.0.0 "GERMLINECNVCALLER_COHORT:GATK4_INTERVALLISTTOOLS": - gatk4: 4.4.0.0 + gatk4: 4.4.0.0 "GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS": - gatk4: 4.4.0.0 + gatk4: 4.4.0.0 "GERMLINECNVCALLER_COHORT:SAMTOOLS_FAIDX": - samtools: 1.17 + samtools: 1.17 "GERMLINECNVCALLER_COHORT:SAMTOOLS_INDEX": - samtools: 1.17 + samtools: 1.17 "GERMLINECNVCALLER_COHORT:PICARD_CREATESEQUENCEDICTIONARY": - picard: 3.0.0 + picard: 3.0.0 From 8c655de4190e47e9cfd770c472ae97bc690fd1c5 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Sep 2023 01:48:58 +0200 Subject: [PATCH 046/234] update paths --- tests/config/tags.yml | 2 +- .../{subworkflows/local => }/germlinecnvcaller_cohort.nf.test | 0 .../local => }/germlinecnvcaller_cohort.nf.test.snap | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename tests/pipeline/{subworkflows/local => }/germlinecnvcaller_cohort.nf.test (100%) rename tests/pipeline/{subworkflows/local => }/germlinecnvcaller_cohort.nf.test.snap (100%) diff --git a/tests/config/tags.yml b/tests/config/tags.yml index 31777b4..f5380bd 100644 --- a/tests/config/tags.yml +++ b/tests/config/tags.yml @@ -16,4 +16,4 @@ cnvkit: germlinecnvcaller: - conf/modules/germlinecnvcaller.config - subworkflows/local/germlinecnvcaller_cohort.nf - - tests/pipeline/subworkflows/local/germlinecnvcaller_cohort.nf.snap + - tests/pipeline/germlinecnvcaller_cohort.nf.test diff --git a/tests/pipeline/subworkflows/local/germlinecnvcaller_cohort.nf.test b/tests/pipeline/germlinecnvcaller_cohort.nf.test similarity index 100% rename from tests/pipeline/subworkflows/local/germlinecnvcaller_cohort.nf.test rename to tests/pipeline/germlinecnvcaller_cohort.nf.test diff --git a/tests/pipeline/subworkflows/local/germlinecnvcaller_cohort.nf.test.snap b/tests/pipeline/germlinecnvcaller_cohort.nf.test.snap similarity index 100% rename from tests/pipeline/subworkflows/local/germlinecnvcaller_cohort.nf.test.snap rename to tests/pipeline/germlinecnvcaller_cohort.nf.test.snap From 830d9b447f2e64ce4ce4cb11c0548da8e7993bf6 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Sep 2023 01:54:26 +0200 Subject: [PATCH 047/234] update tags.yaml --- ...rmlinecnvcaller.config => germlinecnvcaller_cohort.config} | 0 nextflow.config | 2 +- tests/config/tags.yml | 4 ++-- 3 files changed, 3 insertions(+), 3 deletions(-) rename conf/modules/{germlinecnvcaller.config => germlinecnvcaller_cohort.config} (100%) diff --git a/conf/modules/germlinecnvcaller.config b/conf/modules/germlinecnvcaller_cohort.config similarity index 100% rename from conf/modules/germlinecnvcaller.config rename to conf/modules/germlinecnvcaller_cohort.config diff --git a/nextflow.config b/nextflow.config index 32e37c2..7956a66 100644 --- a/nextflow.config +++ b/nextflow.config @@ -244,7 +244,7 @@ manifest { // Load modules.config for DSL2 module specific options includeConfig 'conf/modules/base.config' includeConfig 'conf/modules/cnvkit.config' -includeConfig 'conf/modules/germlinecnvcaller.config' +includeConfig 'conf/modules/germlinecnvcaller_cohort.config' // Function to ensure that resource requirements don't go beyond // a maximum limit diff --git a/tests/config/tags.yml b/tests/config/tags.yml index f5380bd..a3dbda1 100644 --- a/tests/config/tags.yml +++ b/tests/config/tags.yml @@ -13,7 +13,7 @@ cnvkit: - modules/nf-core/cnvkit/batch/main.nf - tests/cnvkit.nf.test -germlinecnvcaller: - - conf/modules/germlinecnvcaller.config +germlinecnvcaller_cohort: + - conf/modules/germlinecnvcaller_cohort.config - subworkflows/local/germlinecnvcaller_cohort.nf - tests/pipeline/germlinecnvcaller_cohort.nf.test From 545b807795b2741f9fbf326ad50e64bab0306fa1 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Sep 2023 04:28:20 +0200 Subject: [PATCH 048/234] update test --- .../pipeline/germlinecnvcaller_cohort.nf.test | 5 +- .../germlinecnvcaller_cohort.nf.test.snap | 131 ------------------ 2 files changed, 2 insertions(+), 134 deletions(-) delete mode 100644 tests/pipeline/germlinecnvcaller_cohort.nf.test.snap diff --git a/tests/pipeline/germlinecnvcaller_cohort.nf.test b/tests/pipeline/germlinecnvcaller_cohort.nf.test index 101e685..2fd575e 100644 --- a/tests/pipeline/germlinecnvcaller_cohort.nf.test +++ b/tests/pipeline/germlinecnvcaller_cohort.nf.test @@ -33,9 +33,8 @@ nextflow_workflow { assert workflow.trace.succeeded().size() == 13 assert workflow.trace.failed().size() == 0 - assert snapshot ( - path("$outputDir/germlinecnvcaller/").list() - ).match("germlinecnvcaller") + FileFilter filter = file -> file.isFile() + assert path("$outputDir").toFile().listFiles(filter).collect() == 94 def expected = path("$baseDir/tests/test_assets/germlinecnvcaller_software_versions.yaml").yaml.collect() def observed_list = [] diff --git a/tests/pipeline/germlinecnvcaller_cohort.nf.test.snap b/tests/pipeline/germlinecnvcaller_cohort.nf.test.snap deleted file mode 100644 index 85727a9..0000000 --- a/tests/pipeline/germlinecnvcaller_cohort.nf.test.snap +++ /dev/null @@ -1,131 +0,0 @@ -{ - "germlinecnvcaller": { - "content": [ - [ - [ - "contig_ploidy_prior.tsv:md5,7a2f5444b09a1f635a540bbcd23176cf", - "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", - "interval_list.tsv:md5,8c5aaf57cf34ff35b183178a87a9f864", - "mu_mean_bias_j_lowerbound__.tsv:md5,b617c6034a52b4d54911a931bdf45289", - "mu_psi_j_log__.tsv:md5,3e32071c22f02eea9d331f4e6a64e205", - "ploidy_config.json:md5,07e9bdbb9ddfa2d650cdf2c1c19f7fd5", - "std_mean_bias_j_lowerbound__.tsv:md5,6467bec3b9792301e4440bb79ff3fdb2", - "std_psi_j_log__.tsv:md5,754392b7852456cca3ab17222360d48d" - ] - ], - [ - [ - [ - [ - "baseline_copy_number_t.tsv:md5,a8f848d75d241f7932ffdca261bffe7b", - "log_c_emission_tc.tsv:md5,24084fb5e7435a6df43ae75320b0294b", - "log_q_c_tc.tsv:md5,9e915b56ff5d856e6c55dd8e292bb41b", - "mu_denoised_copy_ratio_t.tsv:md5,1f7fc6a88919cca7b6be29665675ef88", - "mu_psi_s_log__.tsv:md5,02545858ccda02a536dc8654c13ed059", - "mu_read_depth_s_log__.tsv:md5,abba1ee920d26594c24a892ed91184e1", - "mu_z_su.tsv:md5,976d7d25d98f6a250ef1a401ce0916c0", - "sample_name.txt:md5,5cf41871eea16c0fbfdcb7f0f664ed95", - "std_denoised_copy_ratio_t.tsv:md5,3655e2309189eda19aa70ecba1539ef2", - "std_psi_s_log__.tsv:md5,6462430dd67574ab7b6975a0b43a3a7a", - "std_read_depth_s_log__.tsv:md5,fd057bd69eea55734b86bb74a05950ab", - "std_z_su.tsv:md5,6642c5bdc1293753f47c17de3fba7f9e" - ], - [ - "baseline_copy_number_t.tsv:md5,51670372d61cf525a2303d293129fc2e", - "log_c_emission_tc.tsv:md5,7b8e68ac6f9e8a7e95dbf79ecf0c803d", - "log_q_c_tc.tsv:md5,6f725f21915557470ade6958922caefc", - "mu_denoised_copy_ratio_t.tsv:md5,d72fcfbfa748805bfc3ea4ec5806ee17", - "mu_psi_s_log__.tsv:md5,b8f8b66afeecce0c49dbde1291c23298", - "mu_read_depth_s_log__.tsv:md5,44eaed835a12482e187d29cb394ee9cf", - "mu_z_su.tsv:md5,14f01b246be1feddb1dc1a2fc1aaaa09", - "sample_name.txt:md5,640a474d6f73b01524287f50ca538c04", - "std_denoised_copy_ratio_t.tsv:md5,a9f76dfc48536f77eb2f2eb9e478bc85", - "std_psi_s_log__.tsv:md5,094d83cd78b0e95721f382b928a4114d", - "std_read_depth_s_log__.tsv:md5,64aa16b9f9e229c5dc8b3e9b436aee2c", - "std_z_su.tsv:md5,4361cc1c87931183100b807489b5607f" - ], - "calling_config.json:md5,6d65995da6a73fcfc7e02df572837921", - "denoising_config.json:md5,e81ec25c546367270e508fad267b9b06", - "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", - "interval_list.tsv:md5,0cba6efc5fa465f7930c901916a3e764" - ], - [ - "calling_config.json:md5,6d65995da6a73fcfc7e02df572837921", - "denoising_config.json:md5,e81ec25c546367270e508fad267b9b06", - "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", - "interval_list.tsv:md5,0cba6efc5fa465f7930c901916a3e764", - "log_q_tau_tk.tsv:md5,cc0465cab0f470449ce6491f2cb84b4d", - "mu_W_tu.tsv:md5,d505e9d819d0891c66a1fae49f18a970", - "mu_ard_u_log__.tsv:md5,3c2320b9d85073e9f04482ef666220ff", - "mu_log_mean_bias_t.tsv:md5,49d8ebde24d59b6ab7308b124d71fa1a", - "mu_psi_t_log__.tsv:md5,64fa5391ecab3600f583c5265b241c1f", - "std_W_tu.tsv:md5,b81ca0b36e880bd0d24f9342891ef5f8", - "std_ard_u_log__.tsv:md5,0ee1ae8a114874d066f30fefbb468ad6", - "std_log_mean_bias_t.tsv:md5,b7d9d5ed45771d8f24bd1f3d6dac5d3f", - "std_psi_t_log__.tsv:md5,845c6ff386e52cfd09f19566888337f9" - ] - ], - [ - [ - [ - "baseline_copy_number_t.tsv:md5,a8f848d75d241f7932ffdca261bffe7b", - "log_c_emission_tc.tsv:md5,8f637945b33995ac807bb26a9e7b93b5", - "log_q_c_tc.tsv:md5,e57cd9043632cbed3b165dda2dff8199", - "mu_denoised_copy_ratio_t.tsv:md5,6f8e05be3f60b7d7094ac255526e8a80", - "mu_psi_s_log__.tsv:md5,5341d83d3e0d4ffa44972c2beb74c1aa", - "mu_read_depth_s_log__.tsv:md5,dc661c73621ac754362ace917fdfeccb", - "mu_z_su.tsv:md5,bdda2270062b4c3a261e7e6302561ab1", - "sample_name.txt:md5,5cf41871eea16c0fbfdcb7f0f664ed95", - "std_denoised_copy_ratio_t.tsv:md5,eaca19d3488cd05353d71dc6d7c372cc", - "std_psi_s_log__.tsv:md5,3f05b40d319abf449eb0723b40bd0b8a", - "std_read_depth_s_log__.tsv:md5,fc1943b7d378f07d35bdbe4f4f9a3815", - "std_z_su.tsv:md5,9f12e0ea3b6cd2c51fd711d00ae75929" - ], - [ - "baseline_copy_number_t.tsv:md5,51670372d61cf525a2303d293129fc2e", - "log_c_emission_tc.tsv:md5,629ef6d8fde7d0654a7f38c624feae58", - "log_q_c_tc.tsv:md5,99f2f764d5925f43d9dd8b3597cf9722", - "mu_denoised_copy_ratio_t.tsv:md5,a622c8a2ce980a7075806b3ce1d72f43", - "mu_psi_s_log__.tsv:md5,868eb75d8539a30d3124ffbb77de7b66", - "mu_read_depth_s_log__.tsv:md5,dbc692cc904598701753ba2b7a6ee85a", - "mu_z_su.tsv:md5,07c5a47783ffd5017e75c9d02e8d4138", - "sample_name.txt:md5,640a474d6f73b01524287f50ca538c04", - "std_denoised_copy_ratio_t.tsv:md5,98c98ec41c3322351c9e2ea4affdd8bb", - "std_psi_s_log__.tsv:md5,08a7cd31719844e7abb324b4861ccde7", - "std_read_depth_s_log__.tsv:md5,2321ed4451d332dce569d5431927b296", - "std_z_su.tsv:md5,8f646acac9aeabc7e311342631963bca" - ], - "calling_config.json:md5,6d65995da6a73fcfc7e02df572837921", - "denoising_config.json:md5,e81ec25c546367270e508fad267b9b06", - "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", - "interval_list.tsv:md5,9f7718372e87e69aeb837678755a9971" - ], - [ - "calling_config.json:md5,6d65995da6a73fcfc7e02df572837921", - "denoising_config.json:md5,e81ec25c546367270e508fad267b9b06", - "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", - "interval_list.tsv:md5,9f7718372e87e69aeb837678755a9971", - "log_q_tau_tk.tsv:md5,0b7eef7c4e838a837309c4df7267ab93", - "mu_W_tu.tsv:md5,0ce68f6fbe7f89039918d864068a911e", - "mu_ard_u_log__.tsv:md5,6e0519d7b10d7ae79bec565c3bde38a4", - "mu_log_mean_bias_t.tsv:md5,c987052cb905076abc1f1a586b0fb89f", - "mu_psi_t_log__.tsv:md5,12fa83b402207dab6c835a8305087e45", - "std_W_tu.tsv:md5,f0f563f160af33e1522b157470f166e9", - "std_ard_u_log__.tsv:md5,9b51cd2bea9386068e643ff39ea20753", - "std_log_mean_bias_t.tsv:md5,768bf7f38caaa34543a6ba778dddc0cb", - "std_psi_t_log__.tsv:md5,4705f29d6828a5d28d0595164853d88a" - ] - ] - ], - [ - "test.hdf5:md5,c37b077e96885c62a4344a44345459c7", - "test2.hdf5:md5,098deec8a7c8b4550f7a5139d226d650" - ], - [ - "genome.dict:md5,aaba331e73d3cec0620b93e292b827b9", - "genome.fasta.fai:md5,3520cd30e1b100e55f578db9c855f685" - ] - ], - "timestamp": "2023-09-10T23:17:19+0000" - } -} \ No newline at end of file From 1e5535c8b3b74447905d2f371a81758ef87871e9 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Sep 2023 04:43:02 +0200 Subject: [PATCH 049/234] change collect to size --- tests/pipeline/germlinecnvcaller_cohort.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipeline/germlinecnvcaller_cohort.nf.test b/tests/pipeline/germlinecnvcaller_cohort.nf.test index 2fd575e..b3e963b 100644 --- a/tests/pipeline/germlinecnvcaller_cohort.nf.test +++ b/tests/pipeline/germlinecnvcaller_cohort.nf.test @@ -34,7 +34,7 @@ nextflow_workflow { assert workflow.trace.failed().size() == 0 FileFilter filter = file -> file.isFile() - assert path("$outputDir").toFile().listFiles(filter).collect() == 94 + assert path("$outputDir").toFile().listFiles(filter).size() == 94 def expected = path("$baseDir/tests/test_assets/germlinecnvcaller_software_versions.yaml").yaml.collect() def observed_list = [] From c38c3093e10e34637fc0882d4c2471ab97eac464 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Sep 2023 17:04:54 +0200 Subject: [PATCH 050/234] update test --- tests/pipeline/germlinecnvcaller_cohort.nf.test | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/pipeline/germlinecnvcaller_cohort.nf.test b/tests/pipeline/germlinecnvcaller_cohort.nf.test index b3e963b..b2087ac 100644 --- a/tests/pipeline/germlinecnvcaller_cohort.nf.test +++ b/tests/pipeline/germlinecnvcaller_cohort.nf.test @@ -33,8 +33,10 @@ nextflow_workflow { assert workflow.trace.succeeded().size() == 13 assert workflow.trace.failed().size() == 0 - FileFilter filter = file -> file.isFile() - assert path("$outputDir").toFile().listFiles(filter).size() == 94 + assert path("$outputDir/germlinecnvcaller/references/genome.dict").toFile().isFile() + assert path("$outputDir/germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model").toFile().isDirectory() + assert path("$outputDir/germlinecnvcaller/determinegermlinecontigploidy/cohort-model").toFile().isDirectory() + def expected = path("$baseDir/tests/test_assets/germlinecnvcaller_software_versions.yaml").yaml.collect() def observed_list = [] From 9101b6702abeeebde5bc8dfe66de223751a6011a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Sep 2023 17:17:19 +0200 Subject: [PATCH 051/234] update version check --- tests/pipeline/germlinecnvcaller_cohort.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipeline/germlinecnvcaller_cohort.nf.test b/tests/pipeline/germlinecnvcaller_cohort.nf.test index b2087ac..febc7da 100644 --- a/tests/pipeline/germlinecnvcaller_cohort.nf.test +++ b/tests/pipeline/germlinecnvcaller_cohort.nf.test @@ -44,7 +44,7 @@ nextflow_workflow { observed.stream() .forEach(observed_list::addAll) - assertContainsInAnyOrder(observed_list, expected) + assertContainsInAnyOrder(expected, observed_list) } From f13d2599243d4eff139833e3d4474cc3be919a58 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Sep 2023 18:22:25 +0200 Subject: [PATCH 052/234] Update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index b823c61..fe82766 100644 --- a/README.md +++ b/README.md @@ -44,10 +44,10 @@ sample4,sample4.bam,,, Each row in the samplesheet represents an alignment file, and it is important that you provide the files in the right format for the analysis you want to run. -| Tool | Alignment format | -| ----------------- | ---------------- | -| cnvkit | bam | -| germlinecnvcaller | bam | +| Tool | Alignment format | +| ----------------- | ---------------------------| +| cnvkit | bam | +| germlinecnvcaller | bam/cram/or a mix of both | Now, you can run the pipeline using: From 8e3ac33a5428a0dfdaab8bae530b1484bca90e8e Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Sep 2023 18:22:57 +0200 Subject: [PATCH 053/234] Update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fe82766..6fa9a86 100644 --- a/README.md +++ b/README.md @@ -44,10 +44,10 @@ sample4,sample4.bam,,, Each row in the samplesheet represents an alignment file, and it is important that you provide the files in the right format for the analysis you want to run. -| Tool | Alignment format | -| ----------------- | ---------------------------| -| cnvkit | bam | -| germlinecnvcaller | bam/cram/or a mix of both | +| Tool | Alignment format | +| ----------------- | ------------------------------| +| cnvkit | bam | +| germlinecnvcaller | bam or cram or a mix of both | Now, you can run the pipeline using: From 7c1cb444fc57f267bde36786ec45bd0590c8524f Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Sep 2023 18:26:01 +0200 Subject: [PATCH 054/234] lint fix --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 6fa9a86..5e94056 100644 --- a/README.md +++ b/README.md @@ -44,10 +44,10 @@ sample4,sample4.bam,,, Each row in the samplesheet represents an alignment file, and it is important that you provide the files in the right format for the analysis you want to run. -| Tool | Alignment format | -| ----------------- | ------------------------------| -| cnvkit | bam | -| germlinecnvcaller | bam or cram or a mix of both | +| Tool | Alignment format | +| ----------------- | ---------------------------- | +| cnvkit | bam | +| germlinecnvcaller | bam or cram or a mix of both | Now, you can run the pipeline using: From 4572deacf7e913317897834f1e03e34f9ec7e2d0 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 11 Sep 2023 18:35:09 +0200 Subject: [PATCH 055/234] update emit --- subworkflows/local/germlinecnvcaller_cohort.nf | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort.nf index 9d26d10..4e1b9ef 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort.nf @@ -129,5 +129,8 @@ workflow GERMLINECNVCALLER_COHORT { ch_versions = ch_versions.mix(GATK4_GERMLINECNVCALLER.out.versions.first()) emit: - versions = ch_versions + cnvmodel = GATK4_GERMLINECNVCALLER.out.cohortmodel + ploidymodel = GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.model + readcounts = ch_readcounts_out + versions = ch_versions } From f461b1718bafd72667412b15539308a6682e6e15 Mon Sep 17 00:00:00 2001 From: Francesco L Date: Sat, 13 Jan 2024 14:52:43 +0100 Subject: [PATCH 056/234] added cnvkit_targets param --- nextflow.config | 3 +++ workflows/createpanelrefs.nf | 20 +++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/nextflow.config b/nextflow.config index 7956a66..75bd145 100644 --- a/nextflow.config +++ b/nextflow.config @@ -28,6 +28,9 @@ params { scatter_content = 5000 segmental_duplications = null + // CNVkit options + cnvkit_targets = null + // MultiQC options multiqc_config = null multiqc_title = null diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index d6cc32e..3e17b9b 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -43,14 +43,16 @@ ch_input = ch_from_samplesheet.map{meta, bam, bai, cram, crai -> } // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() - : Channel.empty() -ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() - : Channel.empty() -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() - : Channel.empty() -ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() - : Channel.empty() +ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() + : Channel.empty() +ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() + : Channel.empty() +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() + : Channel.empty() +ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() + : Channel.empty() +ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() + : Channel.empty() /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -117,7 +119,7 @@ workflow CREATEPANELREFS { .map {meta, bam -> [ meta, [], bam ]} .set { ch_cnvkit_input } - CNVKIT_BATCH ( ch_cnvkit_input, ch_fasta, [[:],[]], [[:],[]], [[:],[]], true ) + CNVKIT_BATCH ( ch_cnvkit_input, ch_fasta, [[:],[]], ch_cnvkit_targets, [[:],[]], true ) ch_versions = ch_versions.mix(CNVKIT_BATCH.out.versions) } From de819a6651210bac0882d13bcc161014730d9bd3 Mon Sep 17 00:00:00 2001 From: Francesco L Date: Sat, 13 Jan 2024 14:58:11 +0100 Subject: [PATCH 057/234] updated schema --- nextflow_schema.json | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 1e05670..dafa1b2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -66,6 +66,17 @@ } } }, + "cnvkit_options": { + "title": "CNVkit options", + "type": "object", + "description": "Options used by the cnvkit subworkflow", + "default": "", + "properties": { + "cnvkit_targets": { + "type": "string" + } + } + }, "input_output_options": { "title": "Input/output options", "type": "object", @@ -384,6 +395,9 @@ { "$ref": "#/definitions/germlinecnvcaller_options" }, + { + "$ref": "#/definitions/cnvkit_options" + }, { "$ref": "#/definitions/input_output_options" }, From d79415d13404ac967e7a39bbff3c9e2fa6af435f Mon Sep 17 00:00:00 2001 From: Francesco L Date: Sun, 14 Jan 2024 08:24:55 +0100 Subject: [PATCH 058/234] instead of empty channel pass dummy tuple --- workflows/createpanelrefs.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 3e17b9b..5105db9 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -52,7 +52,7 @@ ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() : Channel.empty() ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.empty() + : Channel.value([[:],[]]) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 8c61d4f2432f830499f0879b523aff100fcc3ad2 Mon Sep 17 00:00:00 2001 From: Francesco L Date: Sun, 14 Jan 2024 09:46:55 +0100 Subject: [PATCH 059/234] added usage doc for cnvkit targets --- docs/usage.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index 1bc6a75..829ae4e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -114,6 +114,27 @@ If you are running the pipeline to generate references for the GATK's germlinecn 1 To learn more about this file, see [this comment](https://gatk.broadinstitute.org/hc/en-us/community/posts/360074399831/comments/13441240230299) on GATK forum.
+ +### cnvkit + +If you are running the pipeline to generate references for the CNVkit variant calling workflow, you should consider that currently the default method for this pipeline is whole-genome. In order to use the CNVkit default, i.e. hybrid capture, when the user is creating a background for targeted capture sequencing (most commonly, exomes or panels), the user should + +1. provide an additional config file, in order to change or remove the method specified in the default `ext.args`, i.e. + +``` +process { + + withName: CNVKIT_BATCH { + ext.args = {"--output-reference ${meta.id}.cnn"} + } + +} +``` + +2. provide the `--cnvkit_target` parameter (optional) as a .bed file for the targets + + + ## Core Nextflow arguments > **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). From 95f2dafdcc0517cf3e1c4febe9132cce5293d252 Mon Sep 17 00:00:00 2001 From: Francesco L Date: Mon, 15 Jan 2024 14:00:13 +0100 Subject: [PATCH 060/234] prettier fixed --- docs/usage.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 829ae4e..91ff464 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -114,7 +114,6 @@ If you are running the pipeline to generate references for the GATK's germlinecn 1 To learn more about this file, see [this comment](https://gatk.broadinstitute.org/hc/en-us/community/posts/360074399831/comments/13441240230299) on GATK forum.
- ### cnvkit If you are running the pipeline to generate references for the CNVkit variant calling workflow, you should consider that currently the default method for this pipeline is whole-genome. In order to use the CNVkit default, i.e. hybrid capture, when the user is creating a background for targeted capture sequencing (most commonly, exomes or panels), the user should @@ -133,8 +132,6 @@ process { 2. provide the `--cnvkit_target` parameter (optional) as a .bed file for the targets - - ## Core Nextflow arguments > **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). From 1b29ca08b5a5d4ed2ac64c22d152cdf713643825 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Wed, 17 Jan 2024 09:01:25 +0000 Subject: [PATCH 061/234] [automated] Fix linting with Prettier --- .devcontainer/devcontainer.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4ecfbfe..4a9bc5c 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -18,11 +18,11 @@ "python.linting.flake8Path": "/opt/conda/bin/flake8", "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.linting.pylintPath": "/opt/conda/bin/pylint", }, // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] - } - } + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"], + }, + }, } From 8b9c80d2d6ca6aff37f2b2ebf4cc4fd221dc353a Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 17 Jan 2024 10:08:27 +0100 Subject: [PATCH 062/234] remove fastqc --- modules.json | 5 ----- 1 file changed, 5 deletions(-) diff --git a/modules.json b/modules.json index 85c9d9c..8abdadf 100644 --- a/modules.json +++ b/modules.json @@ -15,11 +15,6 @@ "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", "installed_by": ["modules"] }, - "fastqc": { - "branch": "master", - "git_sha": "617777a807a1770f73deb38c80004bac06807eef", - "installed_by": ["modules"] - }, "gatk4/annotateintervals": { "branch": "master", "git_sha": "42ae163c3c6eb23646189c30c07a889ad39c9b0e", From dffbc71b2cd0cb1daea1590e1bfea1fe327b8b0d Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 17 Jan 2024 10:33:05 +0100 Subject: [PATCH 063/234] update snap and remove fastqc --- modules/nf-core/fastqc/environment.yml | 7 -- modules/nf-core/fastqc/main.nf | 55 --------- modules/nf-core/fastqc/meta.yml | 57 --------- modules/nf-core/fastqc/tests/main.nf.test | 109 ------------------ .../nf-core/fastqc/tests/main.nf.test.snap | 10 -- modules/nf-core/fastqc/tests/tags.yml | 2 - tests/pipeline/cnvkit.nf.test.snap | 6 +- tests/pipeline/default.nf.test.snap | 6 +- 8 files changed, 6 insertions(+), 246 deletions(-) delete mode 100644 modules/nf-core/fastqc/environment.yml delete mode 100644 modules/nf-core/fastqc/main.nf delete mode 100644 modules/nf-core/fastqc/meta.yml delete mode 100644 modules/nf-core/fastqc/tests/main.nf.test delete mode 100644 modules/nf-core/fastqc/tests/main.nf.test.snap delete mode 100644 modules/nf-core/fastqc/tests/tags.yml diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml deleted file mode 100644 index 1787b38..0000000 --- a/modules/nf-core/fastqc/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: fastqc -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf deleted file mode 100644 index 9e19a74..0000000 --- a/modules/nf-core/fastqc/main.nf +++ /dev/null @@ -1,55 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : - 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // Make list of old name and new name pairs to use for renaming in the bash while loop - def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } - def rename_to = old_new_pairs*.join(' ').join(' ') - def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') - """ - printf "%s %s\\n" $rename_to | while read old_name new_name; do - [ -f "\${new_name}" ] || ln -s \$old_name \$new_name - done - - fastqc \\ - $args \\ - --threads $task.cpus \\ - $renamed_files - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml deleted file mode 100644 index ee5507e..0000000 --- a/modules/nf-core/fastqc/meta.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: fastqc -description: Run FastQC on sequenced reads -keywords: - - quality control - - qc - - adapters - - fastq -tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ - licence: ["GPL-2.0-only"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" -maintainers: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test deleted file mode 100644 index b9e8f92..0000000 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ /dev/null @@ -1,109 +0,0 @@ -nextflow_process { - - name "Test Process FASTQC" - script "../main.nf" - process "FASTQC" - tag "modules" - tag "modules_nfcore" - tag "fastqc" - - test("Single-Read") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = [ - [ id: 'test', single_end:true ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] - ] - """ - } - } - - then { - assertAll ( - { assert process.success }, - // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. - // looks like this:
Mon 2 Oct 2023
test.gz
- // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, - { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } - ) - } - } -// TODO -// // -// // Test with paired-end data -// // -// workflow test_fastqc_paired_end { -// input = [ -// [id: 'test', single_end: false], // meta map -// [ -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) -// ] -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with interleaved data -// // -// workflow test_fastqc_interleaved { -// input = [ -// [id: 'test', single_end: false], // meta map -// file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with bam data -// // -// workflow test_fastqc_bam { -// input = [ -// [id: 'test', single_end: false], // meta map -// file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with multiple samples -// // -// workflow test_fastqc_multiple { -// input = [ -// [id: 'test', single_end: false], // meta map -// [ -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) -// ] -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with custom prefix -// // -// workflow test_fastqc_custom_prefix { -// input = [ -// [ id:'mysample', single_end:true ], // meta map -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } -} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap deleted file mode 100644 index 636a32c..0000000 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ /dev/null @@ -1,10 +0,0 @@ -{ - "versions": { - "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] - ], - "timestamp": "2023-10-09T23:40:54+0000" - } -} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml deleted file mode 100644 index 7834294..0000000 --- a/modules/nf-core/fastqc/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -fastqc: - - modules/nf-core/fastqc/** diff --git a/tests/pipeline/cnvkit.nf.test.snap b/tests/pipeline/cnvkit.nf.test.snap index 7807a75..97975fc 100644 --- a/tests/pipeline/cnvkit.nf.test.snap +++ b/tests/pipeline/cnvkit.nf.test.snap @@ -1,9 +1,9 @@ { "software_versions": { "content": [ - "{CNVKIT_BATCH={cnvkit=0.9.10}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, Workflow={nf-core/createpanelrefs=1.0dev}}" + "{CNVKIT_BATCH={cnvkit=0.9.10}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.7, yaml=5.4.1}, Workflow={nf-core/createpanelrefs=1.0dev}}" ], - "timestamp": "2023-07-08T16:51:32+0000" + "timestamp": "2024-01-17T10:32:05.468312" }, "cnvkit": { "content": [ @@ -15,4 +15,4 @@ ], "timestamp": "2023-07-08T16:51:32+0000" } -} +} \ No newline at end of file diff --git a/tests/pipeline/default.nf.test.snap b/tests/pipeline/default.nf.test.snap index 807ac4d..4eb14f5 100644 --- a/tests/pipeline/default.nf.test.snap +++ b/tests/pipeline/default.nf.test.snap @@ -1,9 +1,9 @@ { "software_versions": { "content": [ - "{CNVKIT_BATCH={cnvkit=0.9.10}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, Workflow={nf-core/createpanelrefs=1.0dev}}" + "{CNVKIT_BATCH={cnvkit=0.9.10}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.7, yaml=5.4.1}, Workflow={nf-core/createpanelrefs=1.0dev}}" ], - "timestamp": "2023-07-08T16:47:57+0000" + "timestamp": "2024-01-17T10:30:57.668525" }, "cnvkit": { "content": [ @@ -15,4 +15,4 @@ ], "timestamp": "2023-07-08T16:47:57+0000" } -} +} \ No newline at end of file From 7d51ad2e54ab4124cc3a2e914f2ce4ddbdd74b30 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 17 Jan 2024 10:40:53 +0100 Subject: [PATCH 064/234] no commas --- .devcontainer/devcontainer.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4a9bc5c..4ecfbfe 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -18,11 +18,11 @@ "python.linting.flake8Path": "/opt/conda/bin/flake8", "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint", + "python.linting.pylintPath": "/opt/conda/bin/pylint" }, // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"], - }, - }, + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } } From 52c52d445b1a42de9715e0a23703f1d8052b1a14 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Wed, 17 Jan 2024 10:15:17 +0000 Subject: [PATCH 065/234] [automated] Fix linting with Prettier --- .devcontainer/devcontainer.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4ecfbfe..4a9bc5c 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -18,11 +18,11 @@ "python.linting.flake8Path": "/opt/conda/bin/flake8", "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.linting.pylintPath": "/opt/conda/bin/pylint", }, // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] - } - } + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"], + }, + }, } From 13ba7e1d8ef4a947bbedba2a7f157e55348c3cd1 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 26 Jan 2024 21:48:25 +0100 Subject: [PATCH 066/234] install createpon --- modules.json | 5 ++ .../environment.yml | 7 +++ .../createreadcountpanelofnormals/main.nf | 55 +++++++++++++++++++ .../createreadcountpanelofnormals/meta.yml | 45 +++++++++++++++ 4 files changed, 112 insertions(+) create mode 100644 modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml create mode 100644 modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf create mode 100644 modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml diff --git a/modules.json b/modules.json index 8abdadf..f37f8a0 100644 --- a/modules.json +++ b/modules.json @@ -25,6 +25,11 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, + "gatk4/createreadcountpanelofnormals": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "gatk4/determinegermlinecontigploidy": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml b/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml new file mode 100644 index 0000000..ea5b9bf --- /dev/null +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_createreadcountpanelofnormals +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf b/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf new file mode 100644 index 0000000..9d32a99 --- /dev/null +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf @@ -0,0 +1,55 @@ +process GATK4_CREATEREADCOUNTPANELOFNORMALS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(counts) + + output: + tuple val(meta), path("*.hdf5"), emit: pon + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = counts.collect(){"--input $it"}.join(" ") + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK CreateReadCountPanelOfNormals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateReadCountPanelOfNormals \\ + ${args} \\ + ${input_list} \\ + --output ${prefix}.hdf5 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.hdf5 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml b/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml new file mode 100644 index 0000000..ba01f63 --- /dev/null +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml @@ -0,0 +1,45 @@ +name: "gatk4_createreadcountpanelofnormals" +description: Creates a panel of normals (PoN) for read-count denoising given the read counts for samples in the panel. +keywords: + - createreadcountpanelofnormals + - gatk4 + - panelofnormals +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + tool_dev_url: "https://github.com/broadinstitute/gatk" + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - counts: + type: file + description: Read counts in hdf5 or tsv format. + pattern: "*.{hdf5,tsv}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - pon: + type: file + description: Panel-of-normals file. + pattern: "*.{hdf5}" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" From 09999ba89eff2d7c8b2e0a3add0f759df4b79efa Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 27 Jan 2024 00:23:30 +0100 Subject: [PATCH 067/234] add gens --- conf/modules/gens_pon.config | 65 +++++++++++++ conf/modules/germlinecnvcaller_cohort.config | 6 +- conf/test.config | 4 +- docs/usage.md | 8 +- nextflow.config | 18 ++-- nextflow_schema.json | 41 ++++++-- subworkflows/local/gens_pon.nf | 94 +++++++++++++++++++ .../pipeline/germlinecnvcaller_cohort.nf.test | 2 +- workflows/createpanelrefs.nf | 31 ++++-- 9 files changed, 234 insertions(+), 35 deletions(-) create mode 100644 conf/modules/gens_pon.config create mode 100644 subworkflows/local/gens_pon.nf diff --git a/conf/modules/gens_pon.config b/conf/modules/gens_pon.config new file mode 100644 index 0000000..dea78a2 --- /dev/null +++ b/conf/modules/gens_pon.config @@ -0,0 +1,65 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + withName: '.*GENS_PON.*' { + publishDir = [ + enabled: false + ] + } + + withName: '.*GENS_PON:SAMTOOLS_FAIDX' { + ext.when = { params.fai.equals(null) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/gens_pon/references" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*GENS_PON:PICARD_CREATESEQUENCEDICTIONARY' { + ext.when = { params.dict.equals(null) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/gens_pon/references" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*GENS_PON:GATK4_PREPROCESSINTERVALS' { + ext.args = { ["--imr OVERLAPPING_ONLY", + "--bin-length ${params.gens_bin_length}"].join(" ") + } + } + + withName: '.*GENS_PON:GATK4_COLLECTREADCOUNTS' { + ext.args = {"--format ${params.readcount_format} --imr OVERLAPPING_ONLY"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/gens_pon/readcounts" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*GENS_PON:GATK4_CREATEREADCOUNTPANELOFNORMALS' { + ext.args = { ["--minimum-interval-median-percentile 10.0", + "--maximum-chunk-size 29349635"].join(" ")} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/gens_pon/createreadcountpanelofnormals" }, + pattern: "*-model", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + +} diff --git a/conf/modules/germlinecnvcaller_cohort.config b/conf/modules/germlinecnvcaller_cohort.config index b7204f5..9e26e61 100644 --- a/conf/modules/germlinecnvcaller_cohort.config +++ b/conf/modules/germlinecnvcaller_cohort.config @@ -38,8 +38,8 @@ process { withName: '.*GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS' { ext.args = { ["--imr OVERLAPPING_ONLY", - "--padding ${params.padding}", - "--bin-length ${params.bin_length}"].join(" ") + "--padding ${params.gcnv_padding}", + "--bin-length ${params.gcnv_bin_length}"].join(" ") } } @@ -63,7 +63,7 @@ process { } withName: '.*GERMLINECNVCALLER_COHORT:GATK4_INTERVALLISTTOOLS' { - ext.args = {"--SUBDIVISION_MODE INTERVAL_COUNT --SCATTER_CONTENT ${params.scatter_content}"} + ext.args = {"--SUBDIVISION_MODE INTERVAL_COUNT --SCATTER_CONTENT ${params.gcnv_scatter_content}"} } withName: '.*GERMLINECNVCALLER_COHORT:GATK4_DETERMINEGERMLINECONTIGPLOIDY' { diff --git a/conf/test.config b/conf/test.config index 41e6bed..550e42d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -26,8 +26,8 @@ params { tools = 'cnvkit' //Germlinecnvcaller options - scatter_content = 2 - ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" + gcnv_scatter_content = 2 + gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" // Small reference genome genome = null diff --git a/docs/usage.md b/docs/usage.md index c2ac6a6..4f9d2a4 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -111,10 +111,10 @@ If you wish to share such profile (such as upload as supplementary material for If you are running the pipeline to generate references for the GATK's germlinecnvcalling workflow, you should ensure that you have provided all the mandatory options specified in the table below. -| Mandatory | Optional | -| ------------------------- | -------- | -| fasta/genomes | fai | -| ploidy_priors1 | dict | +| Mandatory | Optional | +| ------------------------------ | -------- | +| fasta/genomes | fai | +| gcnv_ploidy_priors1 | dict | 1 To learn more about this file, see [this comment](https://gatk.broadinstitute.org/hc/en-us/community/posts/360074399831/comments/13441240230299) on GATK forum.
diff --git a/nextflow.config b/nextflow.config index b5fe870..e355fd0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,13 +20,16 @@ params { tools = null // No default, must be specified // Germlinecnvcaller options - bin_length = 1000 - mappable_regions = null - padding = 0 - ploidy_priors = null - readcount_format = 'HDF5' - scatter_content = 5000 - segmental_duplications = null + gcnv_bin_length = 1000 + gcnv_mappable_regions = null + gcnv_padding = 0 + gcnv_ploidy_priors = null + gcnv_readcount_format = 'HDF5' + gcnv_scatter_content = 5000 + gcnv_segmental_duplications = null + + // Germlinecnvcaller options + gens_bin_length = 100 // CNVkit options cnvkit_targets = null @@ -253,6 +256,7 @@ manifest { includeConfig 'conf/modules/base.config' includeConfig 'conf/modules/cnvkit.config' includeConfig 'conf/modules/germlinecnvcaller_cohort.config' +includeConfig 'conf/modules/gens_pon.config' // Function to ensure that resource requirements don't go beyond // a maximum limit diff --git a/nextflow_schema.json b/nextflow_schema.json index 5a0a915..613f33f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -11,14 +11,14 @@ "description": "Options used by the germlinecnvcaller subworkflow", "default": "", "properties": { - "bin_length": { + "gcnv_bin_length": { "type": "number", "default": 1000, "description": "Length (in bp) of the bins. If zero, no binning will be performed.", "fa_icon": "fas fa-sort-numeric-down", "help_text": "Used by GATK's PreprocessIntervals. GATK recommends a bin length of 1000 for WGS analysis, and 0 for WES analysis. " }, - "mappable_regions": { + "gcnv_mappable_regions": { "type": "string", "exists": true, "description": "Path to Umap single-read mappability track in .bed or .bed.gz format. Overlapping intervals must be merged.", @@ -26,14 +26,14 @@ "fa_icon": "fas fa-file", "help_text": "Used by GATK's AnnotateIntervals." }, - "padding": { + "gcnv_padding": { "type": "number", "description": "Length (in bp) of the padding regions on each side of the intervals.", "default": 0, "fa_icon": "fas fa-sort-numeric-down", "help_text": "Used by GATK's PreprocessIntervals. GATK recommends a padding of 0 for WGS analysis, and 250 for WES analysis." }, - "ploidy_priors": { + "gcnv_ploidy_priors": { "type": "string", "exists": true, "format": "file-path", @@ -42,21 +42,21 @@ "fa_icon": "fas fa-file", "help_text": "Used by GATK's DeterminGermlineContigPloidy." }, - "readcount_format": { + "gcnv_readcount_format": { "type": "string", "description": "Output file format for count data", "default": "HDF5", "fa_icon": "fas fa-align-left", "enum": ["HDF5", "TSV"] }, - "scatter_content": { + "gcnv_scatter_content": { "type": "number", "description": "When scattering with this argument, each of the resultant files will (ideally) have this amount of interval-counts.", "default": 5000, "fa_icon": "fas fa-sort-numeric-down", "help_text": "Used by GATK/Picards's IntervalListTools." }, - "segmental_duplications": { + "gcnv_segmental_duplications": { "type": "string", "exists": true, "description": "Path to segmental-duplication track in .bed or .bed.gz format. Overlapping intervals must be merged.", @@ -66,6 +66,28 @@ } } }, + "gens_options": { + "title": "GENS options", + "type": "object", + "description": "Options used by the gens subworkflow", + "default": "", + "properties": { + "gens_bin_length": { + "type": "number", + "default": 100, + "description": "Length (in bp) of the bins. If zero, no binning will be performed.", + "fa_icon": "fas fa-sort-numeric-down", + "help_text": "Used by GATK's PreprocessIntervals. We recommend a bin length of 100." + }, + "gens_readcount_format": { + "type": "string", + "description": "Output file format for count data", + "default": "HDF5", + "fa_icon": "fas fa-align-left", + "enum": ["HDF5", "TSV"] + } + } + }, "cnvkit_options": { "title": "CNVkit options", "type": "object", @@ -134,7 +156,7 @@ "fa_icon": "fas fa-toolbox", "description": "Tools to use for building Panel of Normals or models.", "help_text": "Multiple tools separated with commas.\n\nTools available: CNVKIT,germlinecnvcaller", - "pattern": "^((cnvkit|germlinecnvcaller)?,?)*(? + alignment_with_index: index.size() > 0 + return [meta, alignment, index] + alignment_without_index: index.size() == 0 + return [meta, alignment] + } + .set { ch_for_mix } + + SAMTOOLS_INDEX (ch_for_mix.alignment_without_index) + + SAMTOOLS_INDEX.out.bai + .mix(SAMTOOLS_INDEX.out.crai) + .set { ch_index } + + // + // Collect alignment files and their indices + // + ch_for_mix.alignment_without_index + .join(ch_index) + .mix(ch_for_mix.alignment_with_index) + .combine(GATK4_PREPROCESSINTERVALS.out.interval_list.map{it -> it[1]}) + .set {ch_readcounts_in} + + // + // Collect read counts, and generate models + // + GATK4_COLLECTREADCOUNTS (ch_readcounts_in, + ch_fasta, + ch_fai, + ch_dict) + + GATK4_COLLECTREADCOUNTS.out.tsv + .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) + .set { ch_readcounts_out } + + + GATK4_CREATEREADCOUNTPANELOFNORMALS (ch_readcounts_out) + + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + ch_versions = ch_versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) + ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_CREATEREADCOUNTPANELOFNORMALS.out.versions.first()) + + emit: + genspon = GATK4_CREATEREADCOUNTPANELOFNORMALS.out.pon + readcounts = ch_readcounts_out + versions = ch_versions +} diff --git a/tests/pipeline/germlinecnvcaller_cohort.nf.test b/tests/pipeline/germlinecnvcaller_cohort.nf.test index febc7da..2741b5f 100644 --- a/tests/pipeline/germlinecnvcaller_cohort.nf.test +++ b/tests/pipeline/germlinecnvcaller_cohort.nf.test @@ -11,7 +11,7 @@ nextflow_workflow { params { outdir = "$outputDir" tools = 'germlinecnvcaller' - scatter_content = 2 + gcnv_scatter_content = 2 fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" } diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 6e768c8..c22874d 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -43,16 +43,16 @@ ch_input = ch_from_samplesheet.map{meta, bam, bai, cram, crai -> } // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() - : Channel.empty() -ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() - : Channel.empty() -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() - : Channel.empty() -ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() - : Channel.empty() -ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) +ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() + : Channel.empty() +ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() + : Channel.empty() +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() + : Channel.empty() +ch_ploidy_priors = params.gcnv_ploidy_priors ? Channel.fromPath(params.gcnv_ploidy_priors).collect() + : Channel.empty() +ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() + : Channel.value([[:],[]]) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -75,6 +75,7 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // +include { GENS_PON } from '../subworkflows/local/gens_pon' include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' /* @@ -134,6 +135,16 @@ workflow CREATEPANELREFS { ch_versions = ch_versions.mix(GERMLINECNVCALLER_COHORT.out.versions) } + if (params.tools && params.tools.split(',').contains('gens')) { + + GENS_PON(ch_dict, + ch_fai, + ch_fasta, + ch_input) + + ch_versions = ch_versions.mix(GENS_PON.out.versions) + } + CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) From fe0ab74e9f8247792474567888783d11138f9b6a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 29 Jan 2024 22:38:10 +0100 Subject: [PATCH 068/234] update indent and comment --- conf/test.config | 2 +- nextflow.config | 2 +- subworkflows/local/gens_pon.nf | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/conf/test.config b/conf/test.config index 550e42d..00bbe3b 100644 --- a/conf/test.config +++ b/conf/test.config @@ -27,7 +27,7 @@ params { //Germlinecnvcaller options gcnv_scatter_content = 2 - gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" + gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" // Small reference genome genome = null diff --git a/nextflow.config b/nextflow.config index e355fd0..d51a0aa 100644 --- a/nextflow.config +++ b/nextflow.config @@ -28,7 +28,7 @@ params { gcnv_scatter_content = 5000 gcnv_segmental_duplications = null - // Germlinecnvcaller options + // Gens options gens_bin_length = 100 // CNVkit options diff --git a/subworkflows/local/gens_pon.nf b/subworkflows/local/gens_pon.nf index 8dc2ec2..1bf8e57 100644 --- a/subworkflows/local/gens_pon.nf +++ b/subworkflows/local/gens_pon.nf @@ -1,9 +1,9 @@ include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main' +include { GATK4_CREATEREADCOUNTPANELOFNORMALS } from '../../modules/nf-core/gatk4/createreadcountpanelofnormals/main' include { GATK4_PREPROCESSINTERVALS } from '../../modules/nf-core/gatk4/preprocessintervals/main' include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main' include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' -include { GATK4_CREATEREADCOUNTPANELOFNORMALS } from '../../modules/nf-core/gatk4/createreadcountpanelofnormals/main' workflow GENS_PON { take: @@ -80,12 +80,12 @@ workflow GENS_PON { GATK4_CREATEREADCOUNTPANELOFNORMALS (ch_readcounts_out) - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) - ch_versions = ch_versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions.first()) ch_versions = ch_versions.mix(GATK4_CREATEREADCOUNTPANELOFNORMALS.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) + ch_versions = ch_versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) emit: genspon = GATK4_CREATEREADCOUNTPANELOFNORMALS.out.pon From bfe4541dca4296911ef53c46f5e52fbcd308f68d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 15 Feb 2024 16:17:38 +0100 Subject: [PATCH 069/234] add bedtointervalslist --- main.nf | 8 ++- modules.json | 5 ++ .../gatk4/bedtointervallist/environment.yml | 7 +++ .../nf-core/gatk4/bedtointervallist/main.nf | 56 +++++++++++++++++++ .../nf-core/gatk4/bedtointervallist/meta.yml | 51 +++++++++++++++++ nextflow.config | 1 + nextflow_schema.json | 25 +++++++++ .../local/germlinecnvcaller_cohort.nf | 12 ++-- workflows/createpanelrefs.nf | 28 ++++++---- 9 files changed, 174 insertions(+), 19 deletions(-) create mode 100644 modules/nf-core/gatk4/bedtointervallist/environment.yml create mode 100644 modules/nf-core/gatk4/bedtointervallist/main.nf create mode 100644 modules/nf-core/gatk4/bedtointervallist/meta.yml diff --git a/main.nf b/main.nf index cb31f5e..0b453b3 100644 --- a/main.nf +++ b/main.nf @@ -16,9 +16,11 @@ nextflow.enable.dsl = 2 GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') -params.fai = WorkflowMain.getGenomeAttribute(params, 'fai') -params.dict = WorkflowMain.getGenomeAttribute(params, 'dict') +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.fai = WorkflowMain.getGenomeAttribute(params, 'fai') +params.dict = WorkflowMain.getGenomeAttribute(params, 'dict') +params.target_bed = WorkflowMain.getGenomeAttribute(params, 'target_bed') +params.target_interval_list = WorkflowMain.getGenomeAttribute(params, 'target_interval_list') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATE & PRINT PARAMETER SUMMARY diff --git a/modules.json b/modules.json index 8abdadf..bb06eb2 100644 --- a/modules.json +++ b/modules.json @@ -20,6 +20,11 @@ "git_sha": "42ae163c3c6eb23646189c30c07a889ad39c9b0e", "installed_by": ["modules"] }, + "gatk4/bedtointervallist": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "gatk4/collectreadcounts": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", diff --git a/modules/nf-core/gatk4/bedtointervallist/environment.yml b/modules/nf-core/gatk4/bedtointervallist/environment.yml new file mode 100644 index 0000000..e7cb428 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_bedtointervallist +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/bedtointervallist/main.nf b/modules/nf-core/gatk4/bedtointervallist/main.nf new file mode 100644 index 0000000..88b24b1 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/main.nf @@ -0,0 +1,56 @@ +process GATK4_BEDTOINTERVALLIST { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(bed) + tuple val(meta2), path(dict) + + output: + tuple val(meta), path('*.interval_list'), emit: interval_list + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + BedToIntervalList \\ + --INPUT $bed \\ + --OUTPUT ${prefix}.interval_list \\ + --SEQUENCE_DICTIONARY $dict \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.interval_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/bedtointervallist/meta.yml b/modules/nf-core/gatk4/bedtointervallist/meta.yml new file mode 100644 index 0000000..187da88 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/meta.yml @@ -0,0 +1,51 @@ +name: gatk4_bedtointervallist +description: Creates an interval list from a bed file and a reference dict +keywords: + - bed + - bedtointervallist + - gatk4 + - interval list +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - bed: + type: file + description: Input bed file + pattern: "*.bed" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: Sequence dictionary + pattern: "*.dict" +output: + - interval_list: + type: file + description: gatk interval list file + pattern: "*.interval_list" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@ramprasadn" +maintainers: + - "@kevinmenden" + - "@ramprasadn" diff --git a/nextflow.config b/nextflow.config index b5fe870..cb43412 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,6 +20,7 @@ params { tools = null // No default, must be specified // Germlinecnvcaller options + analysis_type = 'wgs' bin_length = 1000 mappable_regions = null padding = 0 diff --git a/nextflow_schema.json b/nextflow_schema.json index 5a0a915..9c72638 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -11,6 +11,13 @@ "description": "Options used by the germlinecnvcaller subworkflow", "default": "", "properties": { + "analysis_type": { + "type": "string", + "default": "wgs", + "description": "Specifies which analysis type for the pipeline- either 'wgs' or 'wes'.", + "fa_icon": "fas fa-align-center", + "enum": ["wgs", "wes"] + }, "bin_length": { "type": "number", "default": 1000, @@ -63,6 +70,24 @@ "format": "file-path", "fa_icon": "fas fa-file", "help_text": "Used by GATK's AnnotateIntervals." + }, + "target_bed": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file", + "pattern": "^\\S+\\.bed(\\.gz)?$", + "description": "Path to directory for target bed file.", + "help_text": "If the regions you would like to analyse are in bed format, use this option. If you have an interval_list file, use `target_interval_list` parameter instead." + }, + "target_interval_list": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file", + "pattern": "^\\S+\\._interval_list$", + "description": "Path to directory for target interval_list file.", + "help_text": "If the regions you would like to analyse are in interval_list format, use this option. If you have a bed file, use `target_bed` parameter instead." } } }, diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort.nf index 4e1b9ef..4a09914 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort.nf @@ -11,11 +11,13 @@ include { SAMTOOLS_INDEX } from '../../modules/nf-core/samt workflow GERMLINECNVCALLER_COHORT { take: - ch_user_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_user_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] - ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] + ch_user_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_user_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] + ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] + ch_target_bed // channel: [mandatory] [ val(meta), path(bed) ] + ch_target_interval_list // channel: [mandatory] [ val(meta), path(intervals) ] main: ch_versions = Channel.empty() diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 6e768c8..cf2e750 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -43,16 +43,20 @@ ch_input = ch_from_samplesheet.map{meta, bam, bai, cram, crai -> } // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() - : Channel.empty() -ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() - : Channel.empty() -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() - : Channel.empty() -ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() - : Channel.empty() -ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) +ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() + : Channel.value([[:],[]]) +ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() + : Channel.empty() +ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() + : Channel.empty() +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() + : Channel.empty() +ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() + : Channel.empty() +ch_target_bed = params.target_bed ? Channel.fromPath(params.target_bed).map { targets -> [[id:targets.baseName],targets]}.collect() + : Channel.value([[:],[]]) +ch_target_interval_list = params.target_interval_list ? Channel.fromPath(params.target_interval_list).map { targets -> [[id:targets.baseName],targets]}.collect() + : Channel.value([[:],[]]) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -129,7 +133,9 @@ workflow CREATEPANELREFS { ch_fai, ch_fasta, ch_input, - ch_ploidy_priors) + ch_ploidy_priors, + ch_target_bed, + ch_target_interval_list) ch_versions = ch_versions.mix(GERMLINECNVCALLER_COHORT.out.versions) } From 0f87bfd995418495097968c8d5a151cbceffa57e Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 15 Feb 2024 22:38:00 +0100 Subject: [PATCH 070/234] bedtointervallist in subworkflow --- conf/modules/germlinecnvcaller_cohort.config | 4 ++ .../local/germlinecnvcaller_cohort.nf | 40 ++++++++++++++----- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/conf/modules/germlinecnvcaller_cohort.config b/conf/modules/germlinecnvcaller_cohort.config index b7204f5..9c417df 100644 --- a/conf/modules/germlinecnvcaller_cohort.config +++ b/conf/modules/germlinecnvcaller_cohort.config @@ -36,6 +36,10 @@ process { ] } + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST' { + ext.when = { params.analysis_type.equals("wes") && !params.target_interval_list } + } + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS' { ext.args = { ["--imr OVERLAPPING_ONLY", "--padding ${params.padding}", diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort.nf index 4a09914..68aa48a 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort.nf @@ -1,4 +1,5 @@ include { GATK4_ANNOTATEINTERVALS } from '../../modules/nf-core/gatk4/annotateintervals/main' +include { GATK4_BEDTOINTERVALLIST } from '../../modules/nf-core/gatk4/bedtointervallist/main' include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main' include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../modules/nf-core/gatk4/determinegermlinecontigploidy/main' include { GATK4_FILTERINTERVALS } from '../../modules/nf-core/gatk4/filterintervals/main' @@ -11,13 +12,13 @@ include { SAMTOOLS_INDEX } from '../../modules/nf-core/samt workflow GERMLINECNVCALLER_COHORT { take: - ch_user_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_user_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] - ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] - ch_target_bed // channel: [mandatory] [ val(meta), path(bed) ] - ch_target_interval_list // channel: [mandatory] [ val(meta), path(intervals) ] + ch_user_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_user_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] + ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] + ch_target_bed // channel: [mandatory] [ val(meta), path(bed) ] + ch_user_target_interval_list // channel: [mandatory] [ val(meta), path(intervals) ] main: ch_versions = Channel.empty() @@ -39,10 +40,27 @@ workflow GERMLINECNVCALLER_COHORT { .collect() .set { ch_fai } - GATK4_PREPROCESSINTERVALS (ch_fasta, - ch_fai, - ch_dict, - [[:],[]], [[:],[]]) + GATK4_BEDTOINTERVALLIST (ch_target_bed, ch_dict) //Runs for wes analysis, when target_bed file is provided instead of target_interval_list + + ch_user_target_interval_list + .combine(GATK4_BEDTOINTERVALLIST.out.interval_list) + .branch { it -> // If CADD is run, then "it" will be [[meta],selvar.vcf,cadd.vcf], else [[meta],selvar.vcf,null] + intervallistfrompath: it[2].equals(null) + return [it[0], it[1]] + intervallistfrombed: !(it[2].equals(null)) + return [it[2], it[3]] + } + .set { ch_for_mix } + + ch_for_mix.intervallistfrompath.mix(ch_for_mix.intervallistfrombed) + .collect() + .set { ch_target_interval_list } + + GATK4_PREPROCESSINTERVALS ( ch_fasta, + ch_fai, + ch_dict, + ch_target_interval_list, + [[:],[]]) GATK4_ANNOTATEINTERVALS (GATK4_PREPROCESSINTERVALS.out.interval_list, ch_fasta, From 0646bdcf39ae1adb375c3eb7b7f46b1b175fc566 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 20 Feb 2024 08:49:05 +0100 Subject: [PATCH 071/234] add options to exclude intervals --- conf/modules/germlinecnvcaller_cohort.config | 8 ++- main.nf | 12 ++-- nextflow_schema.json | 20 +++++- .../local/germlinecnvcaller_cohort.nf | 70 ++++++++++++------- workflows/createpanelrefs.nf | 36 ++++++---- 5 files changed, 98 insertions(+), 48 deletions(-) diff --git a/conf/modules/germlinecnvcaller_cohort.config b/conf/modules/germlinecnvcaller_cohort.config index 9c417df..2785d89 100644 --- a/conf/modules/germlinecnvcaller_cohort.config +++ b/conf/modules/germlinecnvcaller_cohort.config @@ -36,8 +36,12 @@ process { ] } - withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST' { - ext.when = { params.analysis_type.equals("wes") && !params.target_interval_list } + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST_TARGETS' { + ext.when = { params.analysis_type.equals("wes") && params.target_interval_list.equals(null) && params.target_bed } + } + + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST_EXCLUDE' { + ext.when = { params.analysis_type.equals("wes") && params.exclude_interval_list.equals(null) && params.exclude_bed } } withName: '.*GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS' { diff --git a/main.nf b/main.nf index 0b453b3..5ed2a9c 100644 --- a/main.nf +++ b/main.nf @@ -16,11 +16,13 @@ nextflow.enable.dsl = 2 GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') -params.fai = WorkflowMain.getGenomeAttribute(params, 'fai') -params.dict = WorkflowMain.getGenomeAttribute(params, 'dict') -params.target_bed = WorkflowMain.getGenomeAttribute(params, 'target_bed') -params.target_interval_list = WorkflowMain.getGenomeAttribute(params, 'target_interval_list') +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.fai = WorkflowMain.getGenomeAttribute(params, 'fai') +params.dict = WorkflowMain.getGenomeAttribute(params, 'dict') +params.target_bed = WorkflowMain.getGenomeAttribute(params, 'target_bed') +params.target_interval_list = WorkflowMain.getGenomeAttribute(params, 'target_interval_list') +params.exclude_bed = WorkflowMain.getGenomeAttribute(params, 'exclude_bed') +params.exclude_interval_list = WorkflowMain.getGenomeAttribute(params, 'exclude_interval_list') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATE & PRINT PARAMETER SUMMARY diff --git a/nextflow_schema.json b/nextflow_schema.json index 9c72638..37f5795 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -25,6 +25,24 @@ "fa_icon": "fas fa-sort-numeric-down", "help_text": "Used by GATK's PreprocessIntervals. GATK recommends a bin length of 1000 for WGS analysis, and 0 for WES analysis. " }, + "exclude_bed": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file", + "pattern": "^\\S+\\.bed$", + "description": "Path to directory for a bed file containing regions to be exluded from the analysis.", + "help_text": "If the regions you would like to exclude are in bed format, use this option. If you have an interval_list file, use `exclude_interval_list` parameter instead." + }, + "exclude_interval_list": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file", + "pattern": "^\\S+\\._interval_list$", + "description": "Path to directory for exclude_interval_list file.", + "help_text": "If the regions you would like to exclude are in interval_list format, use this option. If you have a bed file, use `exclude` parameter instead." + }, "mappable_regions": { "type": "string", "exists": true, @@ -76,7 +94,7 @@ "exists": true, "format": "path", "fa_icon": "fas fa-file", - "pattern": "^\\S+\\.bed(\\.gz)?$", + "pattern": "^\\S+\\.bed$", "description": "Path to directory for target bed file.", "help_text": "If the regions you would like to analyse are in bed format, use this option. If you have an interval_list file, use `target_interval_list` parameter instead." }, diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort.nf index 68aa48a..facab74 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort.nf @@ -1,24 +1,27 @@ -include { GATK4_ANNOTATEINTERVALS } from '../../modules/nf-core/gatk4/annotateintervals/main' -include { GATK4_BEDTOINTERVALLIST } from '../../modules/nf-core/gatk4/bedtointervallist/main' -include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main' -include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../modules/nf-core/gatk4/determinegermlinecontigploidy/main' -include { GATK4_FILTERINTERVALS } from '../../modules/nf-core/gatk4/filterintervals/main' -include { GATK4_GERMLINECNVCALLER } from '../../modules/nf-core/gatk4/germlinecnvcaller/main' -include { GATK4_INTERVALLISTTOOLS } from '../../modules/nf-core/gatk4/intervallisttools/main' -include { GATK4_PREPROCESSINTERVALS } from '../../modules/nf-core/gatk4/preprocessintervals/main' -include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main' -include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { GATK4_ANNOTATEINTERVALS } from '../../modules/nf-core/gatk4/annotateintervals/main' +include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_TARGETS } from '../../modules/nf-core/gatk4/bedtointervallist/main' +include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_EXCLUDE } from '../../modules/nf-core/gatk4/bedtointervallist/main' +include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main' +include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../modules/nf-core/gatk4/determinegermlinecontigploidy/main' +include { GATK4_FILTERINTERVALS } from '../../modules/nf-core/gatk4/filterintervals/main' +include { GATK4_GERMLINECNVCALLER } from '../../modules/nf-core/gatk4/germlinecnvcaller/main' +include { GATK4_INTERVALLISTTOOLS } from '../../modules/nf-core/gatk4/intervallisttools/main' +include { GATK4_PREPROCESSINTERVALS } from '../../modules/nf-core/gatk4/preprocessintervals/main' +include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main' +include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' workflow GERMLINECNVCALLER_COHORT { take: - ch_user_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_user_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] - ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] - ch_target_bed // channel: [mandatory] [ val(meta), path(bed) ] - ch_user_target_interval_list // channel: [mandatory] [ val(meta), path(intervals) ] + ch_user_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_user_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] + ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] + ch_target_bed // channel: [mandatory] [ val(meta), path(bed) ] + ch_user_target_interval_list // channel: [mandatory] [ val(meta), path(intervals) ] + ch_exclude_bed // channel: [mandatory] [ val(meta), path(bed) ] + ch_user_exclude_interval_list // channel: [mandatory] [ val(meta), path(intervals) ] main: ch_versions = Channel.empty() @@ -40,27 +43,42 @@ workflow GERMLINECNVCALLER_COHORT { .collect() .set { ch_fai } - GATK4_BEDTOINTERVALLIST (ch_target_bed, ch_dict) //Runs for wes analysis, when target_bed file is provided instead of target_interval_list + GATK4_BEDTOINTERVALLIST_TARGETS (ch_target_bed, ch_dict) //Runs for wes analysis, when target_bed file is provided instead of target_interval_list + GATK4_BEDTOINTERVALLIST_EXCLUDE (ch_exclude_bed, ch_dict) //Runs for wes analysis, when exclude_bed file is provided instead of target_interval_list ch_user_target_interval_list - .combine(GATK4_BEDTOINTERVALLIST.out.interval_list) - .branch { it -> // If CADD is run, then "it" will be [[meta],selvar.vcf,cadd.vcf], else [[meta],selvar.vcf,null] + .combine(GATK4_BEDTOINTERVALLIST_TARGETS.out.interval_list.ifEmpty(null)) + .branch { it -> intervallistfrompath: it[2].equals(null) return [it[0], it[1]] intervallistfrombed: !(it[2].equals(null)) return [it[2], it[3]] } - .set { ch_for_mix } + .set { ch_targets_for_mix } - ch_for_mix.intervallistfrompath.mix(ch_for_mix.intervallistfrombed) + ch_targets_for_mix.intervallistfrompath.mix(ch_targets_for_mix.intervallistfrombed) .collect() - .set { ch_target_interval_list } + .set {ch_target_interval_list} + + ch_user_exclude_interval_list + .combine(GATK4_BEDTOINTERVALLIST_EXCLUDE.out.interval_list.ifEmpty(null)) + .branch { it -> + intervallistfrompath: it[2].equals(null) + return [it[0], it[1]] + intervallistfrombed: !(it[2].equals(null)) + return [it[2], it[3]] + } + .set { ch_exclude_for_mix } + + ch_exclude_for_mix.intervallistfrompath.mix(ch_exclude_for_mix.intervallistfrombed) + .collect() + .set { ch_exclude_interval_list } GATK4_PREPROCESSINTERVALS ( ch_fasta, ch_fai, ch_dict, ch_target_interval_list, - [[:],[]]) + ch_exclude_interval_list) GATK4_ANNOTATEINTERVALS (GATK4_PREPROCESSINTERVALS.out.interval_list, ch_fasta, @@ -141,6 +159,8 @@ workflow GERMLINECNVCALLER_COHORT { ch_versions = ch_versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) + ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST_TARGETS.out.versions) + ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST_EXCLUDE.out.versions) ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions.first()) ch_versions = ch_versions.mix(GATK4_ANNOTATEINTERVALS.out.versions) ch_versions = ch_versions.mix(GATK4_FILTERINTERVALS.out.versions) diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index cf2e750..e16b52d 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -43,20 +43,24 @@ ch_input = ch_from_samplesheet.map{meta, bam, bai, cram, crai -> } // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) -ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() - : Channel.empty() -ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() - : Channel.empty() -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() - : Channel.empty() -ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() - : Channel.empty() -ch_target_bed = params.target_bed ? Channel.fromPath(params.target_bed).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) -ch_target_interval_list = params.target_interval_list ? Channel.fromPath(params.target_interval_list).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) +ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() + : Channel.value([[:],[]]) +ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() + : Channel.empty() +ch_exclude_bed = params.exclude_bed ? Channel.fromPath(params.exclude_bed).map { exclude -> [[id:exclude.baseName],exclude]}.collect() + : Channel.value([[:],[]]) +ch_exclude_interval_list = params.exclude_interval_list ? Channel.fromPath(params.exclude_interval_list).map { exclude -> [[id:exclude.baseName],exclude]}.collect() + : Channel.value([[:],[]]) +ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() + : Channel.empty() +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() + : Channel.empty() +ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() + : Channel.empty() +ch_target_bed = params.target_bed ? Channel.fromPath(params.target_bed).map { targets -> [[id:targets.baseName],targets]}.collect() + : Channel.value([[:],[]]) +ch_target_interval_list = params.target_interval_list ? Channel.fromPath(params.target_interval_list).map { targets -> [[id:targets.baseName],targets]}.collect() + : Channel.value([[:],[]]) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -135,7 +139,9 @@ workflow CREATEPANELREFS { ch_input, ch_ploidy_priors, ch_target_bed, - ch_target_interval_list) + ch_target_interval_list, + ch_exclude_bed, + ch_exclude_interval_list) ch_versions = ch_versions.mix(GERMLINECNVCALLER_COHORT.out.versions) } From 08a2ef3a1897aad11fd89bf5c0bc5301bc0a23d7 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 21 Feb 2024 10:27:16 +0100 Subject: [PATCH 072/234] devcontainer --- .devcontainer/devcontainer.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4a9bc5c..4ecfbfe 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -18,11 +18,11 @@ "python.linting.flake8Path": "/opt/conda/bin/flake8", "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint", + "python.linting.pylintPath": "/opt/conda/bin/pylint" }, // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"], - }, - }, + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } } From 00bc062bd272ce548afda9e8d7ff75c486e10c1e Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 21 Feb 2024 10:37:40 +0100 Subject: [PATCH 073/234] fix lint --- .../local/germlinecnvcaller_cohort.nf | 52 +++++++++---------- workflows/createpanelrefs.nf | 10 ++-- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort.nf index 4e1b9ef..750764b 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort.nf @@ -23,9 +23,9 @@ workflow GERMLINECNVCALLER_COHORT { // // Prepare references // - SAMTOOLS_FAIDX (ch_fasta, [[:],[]]) + SAMTOOLS_FAIDX ( ch_fasta, [[:],[]] ) - PICARD_CREATESEQUENCEDICTIONARY (ch_fasta) + PICARD_CREATESEQUENCEDICTIONARY ( ch_fasta ) ch_user_dict .mix(PICARD_CREATESEQUENCEDICTIONARY.out.reference_dict) @@ -37,16 +37,16 @@ workflow GERMLINECNVCALLER_COHORT { .collect() .set { ch_fai } - GATK4_PREPROCESSINTERVALS (ch_fasta, - ch_fai, - ch_dict, - [[:],[]], [[:],[]]) + GATK4_PREPROCESSINTERVALS ( ch_fasta, + ch_fai, + ch_dict, + [[:],[]], [[:],[]] ) - GATK4_ANNOTATEINTERVALS (GATK4_PREPROCESSINTERVALS.out.interval_list, - ch_fasta, - ch_fai, - ch_dict, - [[:],[]], [[:],[]], [[:],[]], [[:],[]]) + GATK4_ANNOTATEINTERVALS ( GATK4_PREPROCESSINTERVALS.out.interval_list, + ch_fasta, + ch_fai, + ch_dict, + [[:],[]], [[:],[]], [[:],[]], [[:],[]]) // // Filter out files that lack indices, and generate them @@ -60,11 +60,11 @@ workflow GERMLINECNVCALLER_COHORT { } .set { ch_for_mix } - SAMTOOLS_INDEX (ch_for_mix.alignment_without_index) + SAMTOOLS_INDEX ( ch_for_mix.alignment_without_index ) SAMTOOLS_INDEX.out.bai - .mix(SAMTOOLS_INDEX.out.crai) - .set { ch_index } + .mix(SAMTOOLS_INDEX.out.crai) + .set { ch_index } // // Collect alignment files and their indices @@ -78,10 +78,10 @@ workflow GERMLINECNVCALLER_COHORT { // // Collect read counts, and generate models // - GATK4_COLLECTREADCOUNTS (ch_readcounts_in, - ch_fasta, - ch_fai, - ch_dict) + GATK4_COLLECTREADCOUNTS ( ch_readcounts_in, + ch_fasta, + ch_fai, + ch_dict ) GATK4_COLLECTREADCOUNTS.out.tsv .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) @@ -90,11 +90,11 @@ workflow GERMLINECNVCALLER_COHORT { .set { ch_readcounts_out } - GATK4_FILTERINTERVALS (GATK4_PREPROCESSINTERVALS.out.interval_list, - ch_readcounts_out, - GATK4_ANNOTATEINTERVALS.out.annotated_intervals) + GATK4_FILTERINTERVALS ( GATK4_PREPROCESSINTERVALS.out.interval_list, + ch_readcounts_out, + GATK4_ANNOTATEINTERVALS.out.annotated_intervals ) - GATK4_INTERVALLISTTOOLS(GATK4_FILTERINTERVALS.out.interval_list) + GATK4_INTERVALLISTTOOLS ( GATK4_FILTERINTERVALS.out.interval_list ) .interval_list .map {meta, it -> it} .flatten() @@ -105,9 +105,9 @@ workflow GERMLINECNVCALLER_COHORT { .map{ meta, counts, meta2, il -> [meta, counts, il, []] } .set {ch_contigploidy_in} - GATK4_DETERMINEGERMLINECONTIGPLOIDY (ch_contigploidy_in, - [[:],[]], - ch_ploidy_priors) + GATK4_DETERMINEGERMLINECONTIGPLOIDY ( ch_contigploidy_in, + [[:],[]], + ch_ploidy_priors ) ch_readcounts_out .combine(ch_intervallist_out) @@ -115,7 +115,7 @@ workflow GERMLINECNVCALLER_COHORT { .map{ meta, counts, il, meta2, calls -> [meta + [id:il.baseName], counts, il, calls, []] } .set {ch_cnvcaller_in} - GATK4_GERMLINECNVCALLER (ch_cnvcaller_in) + GATK4_GERMLINECNVCALLER ( ch_cnvcaller_in ) ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) ch_versions = ch_versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index d5fb38f..8629e23 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -93,11 +93,11 @@ workflow CREATEPANELREFS { } .set { ch_germlinecnvcaller_input } - GERMLINECNVCALLER_COHORT(ch_dict, - ch_fai, - ch_fasta, - ch_germlinecnvcaller_input, - ch_ploidy_priors) + GERMLINECNVCALLER_COHORT ( ch_dict, + ch_fai, + ch_fasta, + ch_germlinecnvcaller_input, + ch_ploidy_priors ) ch_versions = ch_versions.mix(GERMLINECNVCALLER_COHORT.out.versions) } From a510deeee90e98454b01b3fb5aa12111f00b139d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 21 Feb 2024 10:54:32 +0100 Subject: [PATCH 074/234] fix lint --- modules.json | 7 +------ modules/nf-core/multiqc/tests/main.nf.test.snap | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/modules.json b/modules.json index 8c0537e..02edf9b 100644 --- a/modules.json +++ b/modules.json @@ -10,11 +10,6 @@ "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", "installed_by": ["modules"] }, - "custom/dumpsoftwareversions": { - "branch": "master", - "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", - "installed_by": ["modules"] - }, "gatk4/annotateintervals": { "branch": "master", "git_sha": "42ae163c3c6eb23646189c30c07a889ad39c9b0e", @@ -52,7 +47,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", + "git_sha": "ccacf6f5de6df3bc6d73b665c1fd2933d8bbc290", "installed_by": ["modules"] }, "picard/createsequencedictionary": { diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index 3377ceb..c204b48 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -38,4 +38,4 @@ }, "timestamp": "2024-02-14T09:29:13.223621555" } -} +} \ No newline at end of file From ee63a3999136a2760aa48e08077940fba3109d80 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 21 Feb 2024 12:55:00 +0100 Subject: [PATCH 075/234] fix tests --- tests/config/tags.yml | 1 + tests/lib/UTILS.groovy | 2 +- tests/pipeline/cnvkit.nf.test | 2 +- tests/pipeline/cnvkit.nf.test.snap | 4 ++-- tests/pipeline/default.nf.test.snap | 4 ++-- tests/pipeline/germlinecnvcaller_cohort.config | 3 +++ tests/pipeline/germlinecnvcaller_cohort.nf.test | 1 + .../test_assets/germlinecnvcaller_software_versions.yaml | 6 +++--- workflows/createpanelrefs.nf | 8 ++------ 9 files changed, 16 insertions(+), 15 deletions(-) create mode 100644 tests/pipeline/germlinecnvcaller_cohort.config diff --git a/tests/config/tags.yml b/tests/config/tags.yml index a3dbda1..dd415e2 100644 --- a/tests/config/tags.yml +++ b/tests/config/tags.yml @@ -17,3 +17,4 @@ germlinecnvcaller_cohort: - conf/modules/germlinecnvcaller_cohort.config - subworkflows/local/germlinecnvcaller_cohort.nf - tests/pipeline/germlinecnvcaller_cohort.nf.test + - tests/pipeline/germlinecnvcaller_cohort.nf.config diff --git a/tests/lib/UTILS.groovy b/tests/lib/UTILS.groovy index 311403c..deacb58 100644 --- a/tests/lib/UTILS.groovy +++ b/tests/lib/UTILS.groovy @@ -2,7 +2,7 @@ class UTILS { public static String removeNextflowVersion(outputDir) { - def softwareVersions = path("$outputDir/pipeline_info/software_versions.yml").yaml + def softwareVersions = path("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").yaml if (softwareVersions.containsKey("Workflow")) { softwareVersions.Workflow.remove("Nextflow") } diff --git a/tests/pipeline/cnvkit.nf.test b/tests/pipeline/cnvkit.nf.test index dd255c3..115e5b9 100644 --- a/tests/pipeline/cnvkit.nf.test +++ b/tests/pipeline/cnvkit.nf.test @@ -1,6 +1,6 @@ nextflow_pipeline { - name "Test pipeline" + name "Test CNVKIT_BATCH" script "main.nf" tag "cnvkit" diff --git a/tests/pipeline/cnvkit.nf.test.snap b/tests/pipeline/cnvkit.nf.test.snap index 97975fc..7228c6b 100644 --- a/tests/pipeline/cnvkit.nf.test.snap +++ b/tests/pipeline/cnvkit.nf.test.snap @@ -1,9 +1,9 @@ { "software_versions": { "content": [ - "{CNVKIT_BATCH={cnvkit=0.9.10}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.7, yaml=5.4.1}, Workflow={nf-core/createpanelrefs=1.0dev}}" + "{CNVKIT_BATCH={cnvkit=0.9.10}, Workflow={nf-core/createpanelrefs=v1.0dev}}" ], - "timestamp": "2024-01-17T10:32:05.468312" + "timestamp": "2024-02-21T12:34:52.978702536" }, "cnvkit": { "content": [ diff --git a/tests/pipeline/default.nf.test.snap b/tests/pipeline/default.nf.test.snap index 4eb14f5..e921a19 100644 --- a/tests/pipeline/default.nf.test.snap +++ b/tests/pipeline/default.nf.test.snap @@ -1,9 +1,9 @@ { "software_versions": { "content": [ - "{CNVKIT_BATCH={cnvkit=0.9.10}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.7, yaml=5.4.1}, Workflow={nf-core/createpanelrefs=1.0dev}}" + "{CNVKIT_BATCH={cnvkit=0.9.10}, Workflow={nf-core/createpanelrefs=v1.0dev}}" ], - "timestamp": "2024-01-17T10:30:57.668525" + "timestamp": "2024-02-21T12:37:23.523857103" }, "cnvkit": { "content": [ diff --git a/tests/pipeline/germlinecnvcaller_cohort.config b/tests/pipeline/germlinecnvcaller_cohort.config new file mode 100644 index 0000000..defe024 --- /dev/null +++ b/tests/pipeline/germlinecnvcaller_cohort.config @@ -0,0 +1,3 @@ +env { + THEANO_FLAGS='base_compiledir=.' +} diff --git a/tests/pipeline/germlinecnvcaller_cohort.nf.test b/tests/pipeline/germlinecnvcaller_cohort.nf.test index febc7da..ab282c8 100644 --- a/tests/pipeline/germlinecnvcaller_cohort.nf.test +++ b/tests/pipeline/germlinecnvcaller_cohort.nf.test @@ -4,6 +4,7 @@ nextflow_workflow { script "subworkflows/local/germlinecnvcaller_cohort.nf" workflow "GERMLINECNVCALLER_COHORT" tag "germlinecnvcaller" + config "./germlinecnvcaller_cohort.config" test("Run germlinecnvcaller test") { diff --git a/tests/test_assets/germlinecnvcaller_software_versions.yaml b/tests/test_assets/germlinecnvcaller_software_versions.yaml index f7e4521..9d9a027 100644 --- a/tests/test_assets/germlinecnvcaller_software_versions.yaml +++ b/tests/test_assets/germlinecnvcaller_software_versions.yaml @@ -13,8 +13,8 @@ "GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS": gatk4: 4.4.0.0 "GERMLINECNVCALLER_COHORT:SAMTOOLS_FAIDX": - samtools: 1.17 + samtools: 1.18 "GERMLINECNVCALLER_COHORT:SAMTOOLS_INDEX": - samtools: 1.17 + samtools: 1.18 "GERMLINECNVCALLER_COHORT:PICARD_CREATESEQUENCEDICTIONARY": - picard: 3.0.0 + picard: 3.1.1 diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 8629e23..2c87546 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -68,15 +68,11 @@ workflow CREATEPANELREFS { if (params.tools && params.tools.split(',').contains('cnvkit')) { ch_samplesheet - .map{ meta, align, index -> + .map{ meta, bam, bai, cram, crai -> new_meta = meta + [id:"panel"] - [new_meta, align] + [new_meta, bam] } .groupTuple() - .branch{ - bam: it[0].data_type == "bam" - } - .bam .map {meta, bam -> [ meta, [], bam ]} .set { ch_cnvkit_input } From 4bbb357cf20c33d883007b500c7e42c287f5a3f7 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 21 Feb 2024 13:17:43 +0100 Subject: [PATCH 076/234] review suggestions and error fix --- main.nf | 1 - nextflow_schema.json | 1 - .../local/utils_nfcore_createpanelrefs_pipeline/main.nf | 2 +- tests/pipeline/germlinecnvcaller_cohort.nf.test | 2 +- 4 files changed, 2 insertions(+), 4 deletions(-) diff --git a/main.nf b/main.nf index 3ead0a6..c06659e 100644 --- a/main.nf +++ b/main.nf @@ -29,7 +29,6 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_crea ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// TODO nf-core: Remove this line if you don't need a FASTA file // This is an example of how to use getGenomeAttribute() to fetch parameters // from igenomes.config using `--genome` params.fasta = getGenomeAttribute('fasta') diff --git a/nextflow_schema.json b/nextflow_schema.json index 8d186a4..5a0a915 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -143,7 +143,6 @@ "title": "Reference genome options", "type": "object", "fa_icon": "fas fa-dna", - "required": ["fasta"], "description": "Reference genome related files and options required for the workflow.", "properties": { "genome": { diff --git a/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf b/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf index d04f860..a1e5d89 100644 --- a/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf @@ -1,5 +1,5 @@ // -// Subworkflow with functionality specific to the nf-core/pipeline pipeline +// Subworkflow with functionality specific to the nf-core/createpanelrefs pipeline // /* diff --git a/tests/pipeline/germlinecnvcaller_cohort.nf.test b/tests/pipeline/germlinecnvcaller_cohort.nf.test index ab282c8..ecd7833 100644 --- a/tests/pipeline/germlinecnvcaller_cohort.nf.test +++ b/tests/pipeline/germlinecnvcaller_cohort.nf.test @@ -4,7 +4,7 @@ nextflow_workflow { script "subworkflows/local/germlinecnvcaller_cohort.nf" workflow "GERMLINECNVCALLER_COHORT" tag "germlinecnvcaller" - config "./germlinecnvcaller_cohort.config" + config "tests/pipeline/germlinecnvcaller_cohort.config" test("Run germlinecnvcaller test") { From 886f8d3ade109b4f7741609e1d89d0db3c646a5d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 21 Feb 2024 15:04:50 +0100 Subject: [PATCH 077/234] update test --- tests/pipeline/germlinecnvcaller_cohort.nf.test | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/pipeline/germlinecnvcaller_cohort.nf.test b/tests/pipeline/germlinecnvcaller_cohort.nf.test index ecd7833..1239160 100644 --- a/tests/pipeline/germlinecnvcaller_cohort.nf.test +++ b/tests/pipeline/germlinecnvcaller_cohort.nf.test @@ -25,6 +25,10 @@ nextflow_workflow { [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"),[]], [[ id:'test2' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam"),[]]) input[4] = Channel.fromPath(params.ploidy_priors) + input[5] = Channel.value([[:],[]]) + input[6] = Channel.value([[:],[]]) + input[7] = Channel.value([[:],[]]) + input[8] = Channel.value([[:],[]]) """ } } From cdec40087aee00b007cb45bf07c536e1251f2ce9 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 21 Feb 2024 15:26:13 +0100 Subject: [PATCH 078/234] update usage --- docs/usage.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index c2ac6a6..97d9d07 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -111,10 +111,18 @@ If you wish to share such profile (such as upload as supplementary material for If you are running the pipeline to generate references for the GATK's germlinecnvcalling workflow, you should ensure that you have provided all the mandatory options specified in the table below. -| Mandatory | Optional | -| ------------------------- | -------- | -| fasta/genomes | fai | -| ploidy_priors1 | dict | +| Mandatory | Optional | +| ------------------------- | --------------------------------- | +| fasta/genomes | fai | +| ploidy_priors1 | dict | +| | target_bed/target_interval_list | +| | exclude_bed/exclude_interval_list | +| | bin_length | +| | mappable_regions | +| | padding | +| | readcount_format | +| | scatter_content | +| | segmental_duplications | 1 To learn more about this file, see [this comment](https://gatk.broadinstitute.org/hc/en-us/community/posts/360074399831/comments/13441240230299) on GATK forum.
From a8644760cc027db8504fb5ee1b649d09c696627a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 27 Mar 2024 14:00:40 +0100 Subject: [PATCH 079/234] fix lint --- .github/ISSUE_TEMPLATE/bug_report.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 6f0acb3..3519546 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,7 +42,7 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 23.10.0)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ From 64e2ee48a77ca9a984bc70e6bf31a0104777edb9 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 28 Mar 2024 16:35:40 +0100 Subject: [PATCH 080/234] fix lint errors --- subworkflows/local/gens_pon.nf | 50 ++++++++++++++-------------------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/subworkflows/local/gens_pon.nf b/subworkflows/local/gens_pon.nf index 1bf8e57..326e6be 100644 --- a/subworkflows/local/gens_pon.nf +++ b/subworkflows/local/gens_pon.nf @@ -18,9 +18,9 @@ workflow GENS_PON { // // Prepare references // - SAMTOOLS_FAIDX (ch_fasta, [[:],[]]) + SAMTOOLS_FAIDX ( ch_fasta, [[:],[]] ) - PICARD_CREATESEQUENCEDICTIONARY (ch_fasta) + PICARD_CREATESEQUENCEDICTIONARY ( ch_fasta ) ch_user_dict .mix(PICARD_CREATESEQUENCEDICTIONARY.out.reference_dict) @@ -32,53 +32,45 @@ workflow GENS_PON { .collect() .set { ch_fai } - GATK4_PREPROCESSINTERVALS (ch_fasta, - ch_fai, - ch_dict, - [[:],[]], [[:],[]]) - + GATK4_PREPROCESSINTERVALS ( ch_fasta, ch_fai, ch_dict, [[:],[]], [[:],[]] ) // // Filter out files that lack indices, and generate them // ch_input - .branch { meta, alignment, index -> - alignment_with_index: index.size() > 0 - return [meta, alignment, index] - alignment_without_index: index.size() == 0 - return [meta, alignment] - } - .set { ch_for_mix } + .branch { meta, alignment, index -> + alignment_with_index: index.size() > 0 + return [meta, alignment, index] + alignment_without_index: index.size() == 0 + return [meta, alignment] + } + .set { ch_for_mix } - SAMTOOLS_INDEX (ch_for_mix.alignment_without_index) + SAMTOOLS_INDEX ( ch_for_mix.alignment_without_index ) SAMTOOLS_INDEX.out.bai - .mix(SAMTOOLS_INDEX.out.crai) - .set { ch_index } + .mix(SAMTOOLS_INDEX.out.crai) + .set { ch_index } // // Collect alignment files and their indices // ch_for_mix.alignment_without_index - .join(ch_index) - .mix(ch_for_mix.alignment_with_index) - .combine(GATK4_PREPROCESSINTERVALS.out.interval_list.map{it -> it[1]}) - .set {ch_readcounts_in} + .join(ch_index) + .mix(ch_for_mix.alignment_with_index) + .combine(GATK4_PREPROCESSINTERVALS.out.interval_list.map{it -> it[1]}) + .set {ch_readcounts_in} // // Collect read counts, and generate models // - GATK4_COLLECTREADCOUNTS (ch_readcounts_in, - ch_fasta, - ch_fai, - ch_dict) + GATK4_COLLECTREADCOUNTS ( ch_readcounts_in, ch_fasta, ch_fai, ch_dict ) GATK4_COLLECTREADCOUNTS.out.tsv - .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) - .set { ch_readcounts_out } - + .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) + .set { ch_readcounts_out } - GATK4_CREATEREADCOUNTPANELOFNORMALS (ch_readcounts_out) + GATK4_CREATEREADCOUNTPANELOFNORMALS ( ch_readcounts_out ) ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions.first()) ch_versions = ch_versions.mix(GATK4_CREATEREADCOUNTPANELOFNORMALS.out.versions.first()) From c93b543d6ee31e82ee274f3a148031f69be2bbfc Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 28 Mar 2024 16:48:33 +0100 Subject: [PATCH 081/234] fix lint --- main.nf | 17 ++++++++++------- nextflow.config | 2 -- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/main.nf b/main.nf index 71b8149..2dfd69b 100644 --- a/main.nf +++ b/main.nf @@ -30,13 +30,16 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_crea // This is an example of how to use getGenomeAttribute() to fetch parameters // from igenomes.config using `--genome` -params.fasta = getGenomeAttribute('fasta') -params.fai = getGenomeAttribute('fai') -params.dict = getGenomeAttribute('dict') -params.target_bed = getGenomeAttribute('target_bed') -params.target_interval_list = getGenomeAttribute('target_interval_list') -params.exclude_bed = getGenomeAttribute('exclude_bed') -params.exclude_interval_list = getGenomeAttribute('exclude_interval_list') +params.fasta = getGenomeAttribute('fasta') +params.fai = getGenomeAttribute('fai') +params.dict = getGenomeAttribute('dict') +params.gcnv_exclude_bed = getGenomeAttribute('gcnv_exclude_bed') +params.gcnv_exclude_interval_list = getGenomeAttribute('gcnv_exclude_interval_list') +params.gcnv_mappable_regions = getGenomeAttribute('gcnv_mappable_regions') +params.gcnv_target_bed = getGenomeAttribute('gcnv_target_bed') +params.gcnv_target_interval_list = getGenomeAttribute('gcnv_target_interval_list') +params.gcnv_ploidy_priors = getGenomeAttribute('gcnv_ploidy_priors') +params.gcnv_segmental_duplications = getGenomeAttribute('gcnv_segmental_duplications') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/nextflow.config b/nextflow.config index f685b59..7f0a4d1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -25,9 +25,7 @@ params { // Germlinecnvcaller options gcnv_analysis_type = 'wgs' gcnv_bin_length = 1000 - gcnv_mappable_regions = null gcnv_padding = 0 - gcnv_ploidy_priors = null gcnv_readcount_format = 'HDF5' gcnv_scatter_content = 5000 gcnv_segmental_duplications = null From 6b9501eb4692559546cf010cd80f92f50b917567 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 28 Mar 2024 17:45:42 +0100 Subject: [PATCH 082/234] add tests --- conf/modules/gens_pon.config | 1 - subworkflows/local/gens_pon.nf | 4 ++ tests/config/tags.yml | 7 +++ tests/pipeline/gens_pon.nf.test | 49 +++++++++++++++++++ tests/test_assets/gens_software_versions.yaml | 12 +++++ 5 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 tests/pipeline/gens_pon.nf.test create mode 100644 tests/test_assets/gens_software_versions.yaml diff --git a/conf/modules/gens_pon.config b/conf/modules/gens_pon.config index dea78a2..ee473b1 100644 --- a/conf/modules/gens_pon.config +++ b/conf/modules/gens_pon.config @@ -57,7 +57,6 @@ process { publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/gens_pon/createreadcountpanelofnormals" }, - pattern: "*-model", saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } diff --git a/subworkflows/local/gens_pon.nf b/subworkflows/local/gens_pon.nf index 326e6be..38c9727 100644 --- a/subworkflows/local/gens_pon.nf +++ b/subworkflows/local/gens_pon.nf @@ -68,6 +68,10 @@ workflow GENS_PON { GATK4_COLLECTREADCOUNTS.out.tsv .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) + .collect { it[1] } + .map { it -> + return [[id:"gens_pon"], it] + } .set { ch_readcounts_out } GATK4_CREATEREADCOUNTPANELOFNORMALS ( ch_readcounts_out ) diff --git a/tests/config/tags.yml b/tests/config/tags.yml index dd415e2..3672b33 100644 --- a/tests/config/tags.yml +++ b/tests/config/tags.yml @@ -18,3 +18,10 @@ germlinecnvcaller_cohort: - subworkflows/local/germlinecnvcaller_cohort.nf - tests/pipeline/germlinecnvcaller_cohort.nf.test - tests/pipeline/germlinecnvcaller_cohort.nf.config + - tests/test_assets/germlinecnvcaller_software_versions.yaml + +gens_pon: + - conf/modules/gens_pon.config + - subworkflows/local/gens_pon.nf + - tests/pipeline/gens_pon.nf.test + - tests/test_assets/gens_software_versions.yaml diff --git a/tests/pipeline/gens_pon.nf.test b/tests/pipeline/gens_pon.nf.test new file mode 100644 index 0000000..fec08b2 --- /dev/null +++ b/tests/pipeline/gens_pon.nf.test @@ -0,0 +1,49 @@ +nextflow_workflow { + + name "Test Workflow GENS_PON" + script "subworkflows/local/gens_pon.nf" + workflow "GENS_PON" + tag "gens" + + test("Run gens test") { + + when { + params { + outdir = "$outputDir" + tools = 'gens' + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" + } + workflow { + """ + input[0] = Channel.empty() + input[1] = Channel.empty() + input[2] = Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() + input[3] = Channel.of( + [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"),[]], + [[ id:'test2' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam"),[]]) + """ + } + } + + then { + assert workflow.success + assert workflow.trace.succeeded().size() == 8 + assert workflow.trace.failed().size() == 0 + + assert path("$outputDir/gens_pon/references/genome.dict").toFile().isFile() + assert path("$outputDir/gens_pon/references/genome.fasta.fai").toFile().isFile() + assert path("$outputDir/gens_pon/createreadcountpanelofnormals/gens_pon.hdf5").toFile().isFile() + + def expected = path("$baseDir/tests/test_assets/gens_software_versions.yaml").yaml.collect() + def observed_list = [] + def observed = workflow.out.versions.collect {f -> path(f).yaml.entrySet()} + observed.stream() + .forEach(observed_list::addAll) + + assertContainsInAnyOrder(expected, observed_list) + + } + + } + +} diff --git a/tests/test_assets/gens_software_versions.yaml b/tests/test_assets/gens_software_versions.yaml new file mode 100644 index 0000000..9fec793 --- /dev/null +++ b/tests/test_assets/gens_software_versions.yaml @@ -0,0 +1,12 @@ +"GENS_PON:SAMTOOLS_INDEX": + samtools: 1.18 +"GENS_PON:GATK4_COLLECTREADCOUNTS": + gatk4: 4.4.0.0 +"GENS_PON:SAMTOOLS_FAIDX": + samtools: 1.18 +"GENS_PON:GATK4_PREPROCESSINTERVALS": + gatk4: 4.4.0.0 +"GENS_PON:PICARD_CREATESEQUENCEDICTIONARY": + picard: 3.1.1 +"GENS_PON:GATK4_CREATEREADCOUNTPANELOFNORMALS": + gatk4: 4.4.0.0 From ad579b93ca42216c8ee1b299bc4f879270a78536 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 28 Mar 2024 18:05:38 +0100 Subject: [PATCH 083/234] fix defaults --- workflows/createpanelrefs.nf | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index c481dac..b7caa1b 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -15,6 +15,7 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_crea ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { GENS_PON } from '../subworkflows/local/gens_pon' include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' /* @@ -58,32 +59,6 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.mu ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// - -include { GENS_PON } from '../subworkflows/local/gens_pon' -include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// - -include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 9002e514417be4ea0fc2b822ed1b0c95e72f2782 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 28 Mar 2024 18:41:10 +0100 Subject: [PATCH 084/234] test singularity --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9d8031c..f617509 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,7 +60,7 @@ jobs: # Run tests based on changes in code tags: ["${{ fromJson(needs.changes.outputs.tags) }}"] # Only run docker tests on dev branch - profile: ["docker"] + profile: ["docker", "singularity"] # Only test minimal version NXF_VER: ["23.10.0"] # Always run default test From d718b26812da9ad50526e5e28c214e39dacae87f Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 28 Mar 2024 18:47:56 +0100 Subject: [PATCH 085/234] install singularity --- .github/workflows/ci.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f617509..8c8766b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -91,6 +91,12 @@ jobs: - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: Set up Singularity + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-singularity@v5 + with: + singularity-version: 3.7.1 + - name: Run nf-test uses: Wandalen/wretry.action@v1.0.11 with: From cd0fbdc517838e8fd92c241aa0a64a50b9de3908 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 28 Mar 2024 22:59:11 +0100 Subject: [PATCH 086/234] remove disk cleanup --- .github/workflows/ci.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8c8766b..bbddca7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -88,9 +88,6 @@ jobs: with: version: "${{ matrix.NXF_VER }}" - - name: Disk space cleanup - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - name: Set up Singularity if: matrix.profile == 'singularity' uses: eWaterCycle/setup-singularity@v5 From 29c198972d1b62bcc921adef891c2488b45780e4 Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 29 Mar 2024 22:53:39 +0100 Subject: [PATCH 087/234] add config --- conf/modules/gens_pon.config | 2 +- tests/pipeline/gens_pon.config | 7 +++++++ tests/pipeline/gens_pon.nf.test | 16 +++++++++++----- workflows/createpanelrefs.nf | 13 ++++++++----- 4 files changed, 27 insertions(+), 11 deletions(-) create mode 100644 tests/pipeline/gens_pon.config diff --git a/conf/modules/gens_pon.config b/conf/modules/gens_pon.config index ee473b1..5ea92b5 100644 --- a/conf/modules/gens_pon.config +++ b/conf/modules/gens_pon.config @@ -43,7 +43,7 @@ process { } withName: '.*GENS_PON:GATK4_COLLECTREADCOUNTS' { - ext.args = {"--format ${params.readcount_format} --imr OVERLAPPING_ONLY"} + ext.args = {"--format ${params.gens_readcount_format} --imr OVERLAPPING_ONLY"} publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/gens_pon/readcounts" }, diff --git a/tests/pipeline/gens_pon.config b/tests/pipeline/gens_pon.config new file mode 100644 index 0000000..0a48b59 --- /dev/null +++ b/tests/pipeline/gens_pon.config @@ -0,0 +1,7 @@ +process { + + withName: 'GATK4_CREATEREADCOUNTPANELOFNORMALS' { + ext.args = "--minimum-interval-median-percentile 10 --number-of-eigensamples 2" + } + +} diff --git a/tests/pipeline/gens_pon.nf.test b/tests/pipeline/gens_pon.nf.test index fec08b2..2e06852 100644 --- a/tests/pipeline/gens_pon.nf.test +++ b/tests/pipeline/gens_pon.nf.test @@ -4,19 +4,25 @@ nextflow_workflow { script "subworkflows/local/gens_pon.nf" workflow "GENS_PON" tag "gens" + config "tests/pipeline/gens_pon.config" test("Run gens test") { when { params { - outdir = "$outputDir" - tools = 'gens' - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" + outdir = "$outputDir" + gens_readcount_format = "TSV" + gens_bin_length = 100 + tools = 'gens' + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" + fai = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta.fai" + dict = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.dict" + } workflow { """ - input[0] = Channel.empty() - input[1] = Channel.empty() + input[0] = Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() + input[1] = Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() input[2] = Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() input[3] = Channel.of( [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"),[]], diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index b7caa1b..8a76ba9 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -114,10 +114,17 @@ workflow CREATEPANELREFS { if (params.tools && params.tools.split(',').contains('gens')) { + ch_samplesheet + .map{meta, bam, bai, cram, crai -> + if (bam) return [ meta + [data_type:"bam"], bam, bai ] + if (cram) return [ meta + [data_type:"cram"], cram, crai ] + } + .set { ch_gens_input } + GENS_PON(ch_dict, ch_fai, ch_fasta, - ch_input) + ch_gens_input) ch_versions = ch_versions.mix(GENS_PON.out.versions) } @@ -132,12 +139,8 @@ workflow CREATEPANELREFS { // // MODULE: MultiQC // - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) From b69f503cd21753e51c95fc15ac67e1c4169e8a60 Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 29 Mar 2024 22:55:32 +0100 Subject: [PATCH 088/234] update ci --- .github/workflows/ci.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bbddca7..9d8031c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,7 +60,7 @@ jobs: # Run tests based on changes in code tags: ["${{ fromJson(needs.changes.outputs.tags) }}"] # Only run docker tests on dev branch - profile: ["docker", "singularity"] + profile: ["docker"] # Only test minimal version NXF_VER: ["23.10.0"] # Always run default test @@ -88,11 +88,8 @@ jobs: with: version: "${{ matrix.NXF_VER }}" - - name: Set up Singularity - if: matrix.profile == 'singularity' - uses: eWaterCycle/setup-singularity@v5 - with: - singularity-version: 3.7.1 + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - name: Run nf-test uses: Wandalen/wretry.action@v1.0.11 From 12d3ab6fa4406c64d4f0ce4d537d04d2ae36ce1e Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 30 Mar 2024 20:57:23 +0100 Subject: [PATCH 089/234] update test --- tests/pipeline/gens_pon.config | 14 ++++++++++++++ tests/pipeline/gens_pon.nf.test | 8 +++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tests/pipeline/gens_pon.config b/tests/pipeline/gens_pon.config index 0a48b59..c026504 100644 --- a/tests/pipeline/gens_pon.config +++ b/tests/pipeline/gens_pon.config @@ -5,3 +5,17 @@ process { } } + + +profiles { + docker { + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u root' + } +} diff --git a/tests/pipeline/gens_pon.nf.test b/tests/pipeline/gens_pon.nf.test index 2e06852..b4ff563 100644 --- a/tests/pipeline/gens_pon.nf.test +++ b/tests/pipeline/gens_pon.nf.test @@ -4,7 +4,7 @@ nextflow_workflow { script "subworkflows/local/gens_pon.nf" workflow "GENS_PON" tag "gens" - config "tests/pipeline/gens_pon.config" + config "./gens_pon.config" test("Run gens test") { @@ -15,14 +15,12 @@ nextflow_workflow { gens_bin_length = 100 tools = 'gens' fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" - fai = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta.fai" - dict = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.dict" } workflow { """ - input[0] = Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() - input[1] = Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() + input[0] = Channel.empty() + input[1] = Channel.empty() input[2] = Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() input[3] = Channel.of( [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"),[]], From 027ba23d550c72c16f177194c8a6ff5e9bb5e67c Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 30 Mar 2024 23:24:14 +0100 Subject: [PATCH 090/234] format --- docs/usage.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 41b687a..aa872d0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -129,10 +129,12 @@ process { If you are running the pipeline to generate references for the gens workflow, you should ensure that you have provided all the mandatory options specified in the table below. -| Mandatory | Optional | -| ------------- | -------- | -| fasta/genomes | fai | -| | dict | +| Mandatory | Optional | +| ------------- | -------------------- | +| fasta/genomes | fai | +| | dict | +| | gens_bin_length | +| | gens_readcount_format| ### germlinecnvcaller From 9674a9f0b326e2a8debc86379932d30f34f5e879 Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 30 Mar 2024 23:24:39 +0100 Subject: [PATCH 091/234] typo --- docs/usage.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index aa872d0..e84c726 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -129,12 +129,12 @@ process { If you are running the pipeline to generate references for the gens workflow, you should ensure that you have provided all the mandatory options specified in the table below. -| Mandatory | Optional | -| ------------- | -------------------- | -| fasta/genomes | fai | -| | dict | -| | gens_bin_length | -| | gens_readcount_format| +| Mandatory | Optional | +| ------------- | --------------------- | +| fasta/genomes | fai | +| | dict | +| | gens_bin_length | +| | gens_readcount_format | ### germlinecnvcaller From caf4c90cb9d8f4b3bf282dd20a6a4599452ab40f Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 14:10:04 +0200 Subject: [PATCH 092/234] update bam_create_som_pon_gatk subworkflow --- modules.json | 51 ++---- .../environment.yml | 7 + .../gatk4/createsomaticpanelofnormals/main.nf | 9 +- .../createsomaticpanelofnormals/meta.yml | 7 +- .../gatk4/genomicsdbimport/environment.yml | 7 + .../nf-core/gatk4/genomicsdbimport/main.nf | 9 +- .../nf-core/gatk4/genomicsdbimport/meta.yml | 22 +-- .../gatk4/genomicsdbimport/tests/main.nf.test | 155 ++++++++++++++++++ .../genomicsdbimport/tests/main.nf.test.snap | 40 +++++ .../genomicsdbimport/tests/nextflow.config | 2 + .../gatk4/genomicsdbimport/tests/tags.yml | 3 + modules/nf-core/gatk4/mutect2/environment.yml | 7 + modules/nf-core/gatk4/mutect2/main.nf | 9 +- modules/nf-core/gatk4/mutect2/meta.yml | 10 +- .../nf-core/bam_create_som_pon_gatk/meta.yml | 4 +- .../tests/main.nf.test | 48 ++++++ .../tests/main.nf.test.snap | 52 ++++++ .../tests/nextflow.config | 5 + .../bam_create_som_pon_gatk/tests/tags.yml | 2 + 19 files changed, 378 insertions(+), 71 deletions(-) create mode 100644 modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml create mode 100644 modules/nf-core/gatk4/genomicsdbimport/environment.yml create mode 100644 modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test create mode 100644 modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap create mode 100644 modules/nf-core/gatk4/genomicsdbimport/tests/nextflow.config create mode 100644 modules/nf-core/gatk4/genomicsdbimport/tests/tags.yml create mode 100644 modules/nf-core/gatk4/mutect2/environment.yml create mode 100644 subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test create mode 100644 subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/bam_create_som_pon_gatk/tests/nextflow.config create mode 100644 subworkflows/nf-core/bam_create_som_pon_gatk/tests/tags.yml diff --git a/modules.json b/modules.json index 582b9e5..57887c1 100644 --- a/modules.json +++ b/modules.json @@ -30,6 +30,11 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, + "gatk4/createsomaticpanelofnormals": { + "branch": "master", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", + "installed_by": ["bam_create_som_pon_gatk"] + }, "gatk4/determinegermlinecontigploidy": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", @@ -40,35 +45,30 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, - "gatk4/germlinecnvcaller": { + "gatk4/genomicsdbimport": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["bam_create_som_pon_gatk"] }, - "gatk4/intervallisttools": { + "gatk4/germlinecnvcaller": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, - "gatk4/preprocessintervals": { + "gatk4/intervallisttools": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, - "gatk4/createsomaticpanelofnormals": { - "branch": "master", - "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": ["bam_create_som_pon_gatk"] - }, - "gatk4/genomicsdbimport": { + "gatk4/mutect2": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["bam_create_som_pon_gatk"] }, - "gatk4/mutect2": { + "gatk4/preprocessintervals": { "branch": "master", - "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", - "installed_by": ["bam_create_som_pon_gatk"] + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] }, "multiqc": { "branch": "master", @@ -92,30 +92,11 @@ } } }, - "subworkflows": { - "nf-core": { - "utils_nextflow_pipeline": { - "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] - }, - "utils_nfcore_pipeline": { - "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] - }, - "utils_nfvalidation_plugin": { - "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] - } - } - }, "subworkflows": { "nf-core": { "bam_create_som_pon_gatk": { "branch": "master", - "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["subworkflows"] } } diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml b/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml new file mode 100644 index 0000000..ae543c6 --- /dev/null +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_createsomaticpanelofnormals +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf b/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf index e5557c7..27a50dc 100644 --- a/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf @@ -2,10 +2,10 @@ process GATK4_CREATESOMATICPANELOFNORMALS { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(genomicsdb) @@ -32,7 +32,8 @@ process GATK4_CREATESOMATICPANELOFNORMALS { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" CreateSomaticPanelOfNormals \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateSomaticPanelOfNormals \\ --variant gendb://$genomicsdb \\ --output ${prefix}.vcf.gz \\ --reference $fasta \\ diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml b/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml index 2f49cf2..9c3ee19 100644 --- a/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml @@ -1,8 +1,8 @@ name: gatk4_createsomaticpanelofnormals description: Create a panel of normals contraining germline and artifactual sites for use with mutect2. keywords: - - gatk4 - createsomaticpanelofnormals + - gatk4 - panelofnormals tools: - gatk4: @@ -13,7 +13,6 @@ tools: homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 - input: - meta: type: map @@ -51,7 +50,6 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - output: - vcf: type: file @@ -65,6 +63,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/genomicsdbimport/environment.yml b/modules/nf-core/gatk4/genomicsdbimport/environment.yml new file mode 100644 index 0000000..a3a1363 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_genomicsdbimport +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/genomicsdbimport/main.nf b/modules/nf-core/gatk4/genomicsdbimport/main.nf index dc77345..6f1d4c5 100644 --- a/modules/nf-core/gatk4/genomicsdbimport/main.nf +++ b/modules/nf-core/gatk4/genomicsdbimport/main.nf @@ -2,10 +2,10 @@ process GATK4_GENOMICSDBIMPORT { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf), path(tbi), path(interval_file), val(interval_value), path(wspace) @@ -53,7 +53,8 @@ process GATK4_GENOMICSDBIMPORT { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" GenomicsDBImport \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + GenomicsDBImport \\ $input_command \\ $genomicsdb_command \\ $interval_command \\ diff --git a/modules/nf-core/gatk4/genomicsdbimport/meta.yml b/modules/nf-core/gatk4/genomicsdbimport/meta.yml index af626cb..11e565b 100644 --- a/modules/nf-core/gatk4/genomicsdbimport/meta.yml +++ b/modules/nf-core/gatk4/genomicsdbimport/meta.yml @@ -2,10 +2,10 @@ name: gatk4_genomicsdbimport description: merge GVCFs from multiple samples. For use in joint genotyping or somatic panel of normal creation. keywords: - gatk4 - - genomicsdbimport - genomicsdb - - panelofnormalscreation + - genomicsdbimport - jointgenotyping + - panelofnormalscreation tools: - gatk4: description: | @@ -15,7 +15,6 @@ tools: homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 - input: - meta: type: map @@ -26,42 +25,34 @@ input: type: list description: either a list of vcf files to be used to create or update a genomicsdb, or a file that contains a map to vcf files to be used. pattern: "*.vcf.gz" - - tbi: type: list description: list of tbi files that match with the input vcf files pattern: "*.vcf.gz_tbi" - - wspace: - type: path + type: file description: path to an existing genomicsdb to be used in update db mode or get intervals mode. This WILL NOT specify name of a new genomicsdb in create db mode. pattern: "/path/to/existing/gendb" - - - intervalfile: + - interval_file: type: file description: file containing the intervals to be used when creating the genomicsdb pattern: "*.interval_list" - - - intervalval: + - interval_value: type: string description: if an intervals file has not been spcified, the value enetered here will be used as an interval via the "-L" argument pattern: "example: chr1:1000-10000" - - run_intlist: type: boolean description: Specify whether to run get interval list mode, this option cannot be specified at the same time as run_updatewspace. pattern: "true/false" - - run_updatewspace: type: boolean description: Specify whether to run update genomicsdb mode, this option takes priority over run_intlist. pattern: "true/false" - - input_map: type: boolean description: Specify whether the vcf input is providing a list of vcf file(s) or a single file containing a map of paths to vcf files to be used to create or update a genomicsdb. pattern: "*.sample_map" - output: - genomicsdb: type: directory @@ -79,6 +70,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test new file mode 100644 index 0000000..9c207b3 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test @@ -0,0 +1,155 @@ +nextflow_process { + + name "Test Process GATK4_GENOMICSDBIMPORT" + script "../main.nf" + process "GATK4_GENOMICSDBIMPORT" + + tag "modules" + tag "modules_nfcore" + tag "untar" + tag "gatk4" + tag "gatk4/genomicsdbimport" + + test("test_gatk4_genomicsdbimport_create_genomicsdb") { + + when { + process { + """ + // [meta, vcf, tbi, interval, interval_value, workspace ] + input[0] = [ [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) , + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true) , + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.interval_list', checkIfExists: true) , + [] , + [] ] + // run_intlist + input[1] = false + // run_updatewspace + input[2] = false + // input_map + input[3] = false + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.genomicsdb.get(0).get(1)).list().sort()).match() } + //{ assert snapshot(file(process.out.updatedb.get(0).get(1)).list().sort()).match() } + //{ assert snapshot(process.out.intervallist.get(0).get(1)).match() } + ) + } + + } + + test("test_gatk4_genomicsdbimport_get_intervalslist") { + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = Channel.of([ [], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz', checkIfExists: true) ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ [id:"test"], [], [], [], []]).combine(UNTAR.out.untar.map{ it[1] }) + // run_intlist + input[1] = true + // run_updatewspace + input[2] = false + // input_map + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + //{ assert snapshot(file(process.out.genomicsdb.get(0).get(1)).list().sort()).match() } + //{ assert snapshot(file(process.out.updatedb.get(0).get(1)).list().sort()).match() } + { assert snapshot(process.out.intervallist.get(0).get(1)).match() } + ) + } + + } + + test("test_gatk4_genomicsdbimport_update_genomicsdb") { + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = Channel.of([ [], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz', checkIfExists: true) ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ [id:"test"], file( params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz' , checkIfExists: true), file( params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi' , checkIfExists: true), [], []]).combine(UNTAR.out.untar.map{ it[1] }) + // run_intlist + input[1] = false + // run_updatewspace + input[2] = true + // input_map + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + //{ assert snapshot(file(process.out.genomicsdb.get(0).get(1)).list().sort()).match() } + { assert snapshot(file(process.out.updatedb.get(0).get(1)).list().sort()).match() } + //{ assert snapshot(process.out.intervallist.get(0).get(1)).match() } + ) + } + + } + + test("test_gatk4_genomicsdbimport_stub") { + + options "-stub" + + when { + process { + """ + // [meta, vcf, tbi, interval, interval_value, workspace ] + input[0] = [ [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) , + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true) , + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.interval_list', checkIfExists: true) , + [] , + [] ] + // run_intlist + input[1] = false + // run_updatewspace + input[2] = false + // input_map + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap new file mode 100644 index 0000000..a633bbd --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap @@ -0,0 +1,40 @@ +{ + "test_gatk4_genomicsdbimport_get_intervalslist": { + "content": [ + "test.interval_list:md5,4c85812ac15fc1cd29711a851d23c0bf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-28T17:55:03.846241" + }, + "test_gatk4_genomicsdbimport_create_genomicsdb": { + "content": [ + "__tiledb_workspace.tdb", + "callset.json", + "chr22$1$40001", + "vcfheader.vcf", + "vidmap.json" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T11:22:10.11423157" + }, + "test_gatk4_genomicsdbimport_update_genomicsdb": { + "content": [ + "__tiledb_workspace.tdb", + "callset.json", + "chr22$1$40001", + "vcfheader.vcf", + "vidmap.json" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T12:46:42.403794676" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/genomicsdbimport/tests/nextflow.config b/modules/nf-core/gatk4/genomicsdbimport/tests/nextflow.config new file mode 100644 index 0000000..e177a14 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/tests/nextflow.config @@ -0,0 +1,2 @@ +process { +} diff --git a/modules/nf-core/gatk4/genomicsdbimport/tests/tags.yml b/modules/nf-core/gatk4/genomicsdbimport/tests/tags.yml new file mode 100644 index 0000000..8a00857 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/tests/tags.yml @@ -0,0 +1,3 @@ +gatk4/genomicsdbimport: + - "modules/nf-core/gatk4/genomicsdbimport/**" + - "modules/nf-core/untar/**" diff --git a/modules/nf-core/gatk4/mutect2/environment.yml b/modules/nf-core/gatk4/mutect2/environment.yml new file mode 100644 index 0000000..86f4bfa --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_mutect2 +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/mutect2/main.nf b/modules/nf-core/gatk4/mutect2/main.nf index bddc368..79d8d28 100644 --- a/modules/nf-core/gatk4/mutect2/main.nf +++ b/modules/nf-core/gatk4/mutect2/main.nf @@ -2,10 +2,10 @@ process GATK4_MUTECT2 { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(input), path(input_index), path(intervals) @@ -42,7 +42,8 @@ process GATK4_MUTECT2 { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" Mutect2 \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + Mutect2 \\ $inputs \\ --output ${prefix}.vcf.gz \\ --reference $fasta \\ diff --git a/modules/nf-core/gatk4/mutect2/meta.yml b/modules/nf-core/gatk4/mutect2/meta.yml index 4842c22..21c928e 100644 --- a/modules/nf-core/gatk4/mutect2/meta.yml +++ b/modules/nf-core/gatk4/mutect2/meta.yml @@ -2,8 +2,10 @@ name: gatk4_mutect2 description: Call somatic SNVs and indels via local assembly of haplotypes. keywords: - gatk4 - - mutect2 - haplotype + - indels + - mutect2 + - snvs - somatic tools: - gatk4: @@ -15,7 +17,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -77,7 +78,6 @@ input: type: file description: Index for the panel of normals. pattern: "*.vcf.gz.tbi" - output: - vcf: type: file @@ -99,7 +99,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@GCJMackenzie" - "@ramprasadn" +maintainers: + - "@GCJMackenzie" + - "@ramprasadn" diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml b/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml index e682f7e..2660836 100644 --- a/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml @@ -9,7 +9,7 @@ keywords: - variant_calling - genomicsdb_workspace - panel_of_normals -modules: +components: - gatk4/mutect2 - gatk4/genomicsdbimport - gatk4/createsomaticpanelofnormals @@ -65,3 +65,5 @@ output: pattern: "*vcf.gz.tbi" authors: - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test new file mode 100644 index 0000000..3efffd4 --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test @@ -0,0 +1,48 @@ +nextflow_workflow { + + name "Test Subworkflow BAM_CREATE_SOM_PON_GATK" + script "../main.nf" + config "./nextflow.config" + + workflow "BAM_CREATE_SOM_PON_GATK" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_create_som_pon_gatk" + tag "gatk4" + tag "gatk4/mutect2" + tag "gatk4/genomicsdbimport" + tag "gatk4/createsomaticpanelofnormals" + + test("test_create_som_pon_gatk_bam") { + when { + workflow { + """ + // ch_mutect2_in + input[0] = Channel.of([[ id:'test1' ], file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true),[]],[[ id:'test2' ], file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true), []]) + // ch_fasta + input[1] = Channel.value([ [ id:'genome' ], file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)]) + // ch_fai + input[2] = Channel.value([ [ id:'genome' ], file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)]) + // ch_dict + input[3] = Channel.value([ [ id:'genome' ], file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists:true)]) + // str_pon_norm + input[4] = "test_panel" + // ch_interval_file + input[5] = Channel.value(file(params.test_data['homo_sapiens']['genome']['genome_21_interval_list'], checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(file(workflow.out.mutect2_vcf.get(0).get(1)).name).match("test1.vcf.gz") }, + { assert snapshot(file(workflow.out.mutect2_index.get(0).get(1)).name).match("test1.vcf.gz.tbi") }, + { assert snapshot(file(workflow.out.mutect2_stats.get(0).get(1)).name).match("test1.vcf.gz.stats") }, + { assert snapshot(file(workflow.out.pon_vcf.get(0).get(1)).name).match("test_panel.vcf.gz") }, + { assert snapshot(file(workflow.out.pon_index.get(0).get(1)).name).match("test_panel.vcf.gz.tbi") }, + ) + } + } +} diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test.snap b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test.snap new file mode 100644 index 0000000..4c0d88a --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test.snap @@ -0,0 +1,52 @@ +{ + "test_panel.vcf.gz": { + "content": [ + "test_panel.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T06:59:54.103667303" + }, + "test1.vcf.gz.stats": { + "content": [ + "test1.vcf.gz.stats" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T06:59:54.102164313" + }, + "test_panel.vcf.gz.tbi": { + "content": [ + "test_panel.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T06:59:54.105382853" + }, + "test1.vcf.gz": { + "content": [ + "test1.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T06:59:54.098085724" + }, + "test1.vcf.gz.tbi": { + "content": [ + "test1.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T06:59:54.100765684" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/nextflow.config b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/nextflow.config new file mode 100644 index 0000000..6a98618 --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GATK4_MUTECT2 { + ext.args = "--max-mnp-distance 0" + } +} diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/tags.yml b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/tags.yml new file mode 100644 index 0000000..bb1b93c --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_create_som_pon_gatk: + - subworkflows/nf-core/bam_create_som_pon_gatk/** From 6cfbc3c2226257299b7594a545f0c191e19de5c8 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 14:13:03 +0200 Subject: [PATCH 093/234] update and retore utils subworkflows --- modules.json | 15 +++++++++++++++ .../utils_nextflow_pipeline/tests/nextflow.config | 2 +- .../utils_nfcore_pipeline/tests/nextflow.config | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/modules.json b/modules.json index 57887c1..b4c5bb7 100644 --- a/modules.json +++ b/modules.json @@ -98,6 +98,21 @@ "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["subworkflows"] + }, + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] } } } diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config index 0fa4aba..d0a926b 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -3,7 +3,7 @@ manifest { author = """nf-core""" homePage = 'https://127.0.0.1' description = """Dummy pipeline""" - nextflowVersion = '!>=23.10.0' + nextflowVersion = '!>=23.04.0' version = '9.9.9' doi = 'https://doi.org/10.5281/zenodo.5070524' } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config index 0fa4aba..d0a926b 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -3,7 +3,7 @@ manifest { author = """nf-core""" homePage = 'https://127.0.0.1' description = """Dummy pipeline""" - nextflowVersion = '!>=23.10.0' + nextflowVersion = '!>=23.04.0' version = '9.9.9' doi = 'https://doi.org/10.5281/zenodo.5070524' } From 2c0789ae945505f396cd0c42918af815401208a8 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 14:15:37 +0200 Subject: [PATCH 094/234] code polish --- workflows/createpanelrefs.nf | 38 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 3229777..9903875 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -27,25 +27,26 @@ include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvc include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' +// Initialize file channels based on params, defined in the params.genomes[params.genome] scope +ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() + : Channel.value([[:],[]]) +ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() + : Channel.empty() +ch_exclude_bed = params.exclude_bed ? Channel.fromPath(params.exclude_bed).map { exclude -> [[id:exclude.baseName],exclude]}.collect() + : Channel.value([[:],[]]) +ch_exclude_interval_list = params.exclude_interval_list ? Channel.fromPath(params.exclude_interval_list).map { exclude -> [[id:exclude.baseName],exclude]}.collect() + : Channel.value([[:],[]]) +ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() + : Channel.empty() +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() + : Channel.empty() +ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() + : Channel.empty() +ch_target_bed = params.target_bed ? Channel.fromPath(params.target_bed).map { targets -> [[id:targets.baseName],targets]}.collect() + : Channel.value([[:],[]]) +ch_target_interval_list = params.target_interval_list ? Channel.fromPath(params.target_interval_list).map { targets -> [[id:targets.baseName],targets]}.collect() + : Channel.value([[:],[]]) -ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) -ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() - : Channel.empty() -ch_exclude_bed = params.exclude_bed ? Channel.fromPath(params.exclude_bed).map { exclude -> [[id:exclude.baseName],exclude]}.collect() - : Channel.value([[:],[]]) -ch_exclude_interval_list = params.exclude_interval_list ? Channel.fromPath(params.exclude_interval_list).map { exclude -> [[id:exclude.baseName],exclude]}.collect() - : Channel.value([[:],[]]) -ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() - : Channel.empty() -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() - : Channel.empty() -ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() - : Channel.empty() -ch_target_bed = params.target_bed ? Channel.fromPath(params.target_bed).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) -ch_target_interval_list = params.target_interval_list ? Channel.fromPath(params.target_interval_list).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES @@ -57,7 +58,6 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.mu ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW From a0c8de730b2c89a6d5a6b1aa7ad1d3c702155d47 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 14:30:02 +0200 Subject: [PATCH 095/234] update schema --- nextflow_schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 1a2daee..6a96e6e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -198,8 +198,8 @@ "type": "string", "fa_icon": "fas fa-toolbox", "description": "Tools to use for building Panel of Normals or models.", - "help_text": "Multiple tools separated with commas.\n\nTools available: CNVKIT,germlinecnvcaller", - "pattern": "^((cnvkit|germlinecnvcaller|gens)?,?)*(? Date: Wed, 3 Apr 2024 14:30:40 +0200 Subject: [PATCH 096/234] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index abde31e..446d7b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#5](https://github.com/nf-core/createpanelrefs/pull/5) - `CNVKIT` can be used to create a PON - [#5](https://github.com/nf-core/createpanelrefs/pull/5) - Usage of nf-validation - [#5](https://github.com/nf-core/createpanelrefs/pull/5) - Usage of nf-test +- [#8](https://github.com/nf-core/createpanelrefs/pull/8) - `Mutect2` can be used to create a PON ### `Fixed` From 2d5c053f28c09333cfbc064126adb645531d34fa Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 16:01:24 +0200 Subject: [PATCH 097/234] add pon_name to schema --- nextflow.config | 216 ++++++++++++++++++----------------- nextflow_schema.json | 14 +++ workflows/createpanelrefs.nf | 58 +++++----- 3 files changed, 154 insertions(+), 134 deletions(-) diff --git a/nextflow.config b/nextflow.config index 7f0a4d1..8a46860 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,18 +10,22 @@ params { // Input options - input = null + input = null + // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false - fasta = null - fai = null - dict = null + genome = null + igenomes_base = 's3://ngi-igenomes/igenomes/' + igenomes_ignore = false + fasta = null + fai = null + dict = null // Building Panel of Normals and models tools = null // No default, must be specified + // Mutect2 options + pon_name = null + // Germlinecnvcaller options gcnv_analysis_type = 'wgs' gcnv_bin_length = 1000 @@ -45,36 +49,36 @@ params { multiqc_methods_description = null // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false // Config options - config_profile_name = null - config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Max resource options // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' // Schema validation default options - validationFailUnrecognisedParams = false - validationLenientMode = false - validationSchemaIgnoreParams = 'genomes,igenomes_base' - validationShowHiddenParams = false - validate_params = true + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes,igenomes_base' + validationShowHiddenParams = false + validate_params = true } @@ -97,95 +101,95 @@ try { // } profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - channels = ['conda-forge', 'bioconda', 'defaults'] - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - docker.runOptions = '-u $(id -u):$(id -g)' + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMoun = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false } gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } @@ -194,10 +198,10 @@ profiles { // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Singularity are enabled // Set to your registry if you have a mirror of containers -apptainer.registry = 'quay.io' -docker.registry = 'quay.io' -podman.registry = 'quay.io' -singularity.registry = 'quay.io' +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' // Nextflow plugins plugins { @@ -215,10 +219,10 @@ if (!params.igenomes_ignore) { // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. env { - PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" - JULIA_DEPOT_PATH = "/usr/local/share/julia" + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" } // Capture exit codes from upstream processes when piping diff --git a/nextflow_schema.json b/nextflow_schema.json index 6a96e6e..4cbdeb3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -142,6 +142,17 @@ } } }, + "mutect2_options": { + "title": "Mutect2 options", + "type": "object", + "description": "Options used by the mutect2 subworkflow", + "default": "", + "properties": { + "pon_name": { + "type": "string" + } + } + }, "input_output_options": { "title": "Input/output options", "type": "object", @@ -458,6 +469,9 @@ { "$ref": "#/definitions/cnvkit_options" }, + { + "$ref": "#/definitions/mutect2_options" + }, { "$ref": "#/definitions/input_output_options" }, diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 9903875..05fd117 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -17,6 +17,7 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_crea include { GENS_PON } from '../subworkflows/local/gens_pon' include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' +include { BAM_CREATE_SOM_PON_GATK } from '../subworkflows/nf-core/bam_create_som_pon_gatk' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -28,24 +29,24 @@ include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch/ma include { MULTIQC } from '../modules/nf-core/multiqc/main' // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) -ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() +ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName], targets]}.collect() + : Channel.value([[id:'null'], []]) +ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName], dict]}.collect() : Channel.empty() -ch_exclude_bed = params.exclude_bed ? Channel.fromPath(params.exclude_bed).map { exclude -> [[id:exclude.baseName],exclude]}.collect() - : Channel.value([[:],[]]) -ch_exclude_interval_list = params.exclude_interval_list ? Channel.fromPath(params.exclude_interval_list).map { exclude -> [[id:exclude.baseName],exclude]}.collect() - : Channel.value([[:],[]]) -ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() +ch_exclude_bed = params.exclude_bed ? Channel.fromPath(params.exclude_bed).map { exclude -> [[id:exclude.baseName], exclude]}.collect() + : Channel.value([[id:'null'], []]) +ch_exclude_interval_list = params.exclude_interval_list ? Channel.fromPath(params.exclude_interval_list).map { exclude -> [[id:exclude.baseName], exclude]}.collect() + : Channel.value([[id:'null'], []]) +ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName], fai]}.collect() : Channel.empty() -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName], fasta]}.collect() : Channel.empty() ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() : Channel.empty() -ch_target_bed = params.target_bed ? Channel.fromPath(params.target_bed).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) -ch_target_interval_list = params.target_interval_list ? Channel.fromPath(params.target_interval_list).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) +ch_target_bed = params.target_bed ? Channel.fromPath(params.target_bed).map { targets -> [[id:targets.baseName], targets]}.collect() + : Channel.value([[id:'null'], []]) +ch_target_interval_list = params.target_interval_list ? Channel.fromPath(params.target_interval_list).map { targets -> [[id:targets.baseName], targets]}.collect() + : Channel.value([[id:'null'], []]) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -76,10 +77,7 @@ workflow CREATEPANELREFS { if (params.tools && params.tools.split(',').contains('cnvkit')) { ch_samplesheet - .map{ meta, bam, bai, cram, crai -> - new_meta = meta + [id:"panel"] - [new_meta, bam] - } + .map{ meta, bam, bai, cram, crai -> [meta + [id:'panel'], bam]} .groupTuple() .map {meta, bam -> [ meta, [], bam ]} .set { ch_cnvkit_input } @@ -92,8 +90,8 @@ workflow CREATEPANELREFS { ch_samplesheet .map{meta, bam, bai, cram, crai -> - if (bam) return [ meta + [data_type:"bam"], bam, bai ] - if (cram) return [ meta + [data_type:"cram"], cram, crai ] + if (bam) return [ meta + [data_type:'bam'], bam, bai ] + if (cram) return [ meta + [data_type:'cram'], cram, crai ] } .set { ch_germlinecnvcaller_input } @@ -112,14 +110,18 @@ workflow CREATEPANELREFS { if (params.tools && params.tools.split(',').contains('mutect2')) { - ch_samplesheet - .map{meta, bam, bai, cram, crai -> - if (bam) return [ meta + [data_type:"bam"], bam, bai ] - if (cram) return [ meta + [data_type:"cram"], cram, crai ] - } - .set { ch_mutect2_input } + ch_mutect2_input = ch_samplesheet.map{meta, bam, bai, cram, crai -> + if (bam) return [ meta + [data_type:'bam'], bam, bai ] + if (cram) return [ meta + [data_type:'cram'], cram, crai ] + } + + BAM_CREATE_SOM_PON_GATK(ch_mutect2_input, + ch_fasta, + ch_fai, + ch_dict, + params.pon_name, + ch_target_bed.map{ meta, bed -> [ bed ] }) - BAM_CREATE_SOM_PON_GATK ( ch_mutect2_input.map{ meta, cram -> [ meta, bam ]}, ch_fasta, ch_fai, ch_dict, params.pon_name, [] ) ch_versions = ch_versions.mix(BAM_CREATE_SOM_PON_GATK.out.versions) } @@ -128,8 +130,8 @@ workflow CREATEPANELREFS { ch_samplesheet .map{meta, bam, bai, cram, crai -> - if (bam) return [ meta + [data_type:"bam"], bam, bai ] - if (cram) return [ meta + [data_type:"cram"], cram, crai ] + if (bam) return [ meta + [data_type:'bam'], bam, bai ] + if (cram) return [ meta + [data_type:'cram'], cram, crai ] } .set { ch_gens_input } From b934a7e81610f92600d7873246aeab75ed03a0b1 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 16:25:05 +0200 Subject: [PATCH 098/234] fix mutect2 usage --- conf/test.config | 17 ++++++++++------- nextflow.config | 3 ++- tests/csv/1.0.0/bam.csv | 3 +++ tests/csv/1.0.0/cram.csv | 3 --- workflows/createpanelrefs.nf | 2 +- 5 files changed, 16 insertions(+), 12 deletions(-) create mode 100644 tests/csv/1.0.0/bam.csv delete mode 100644 tests/csv/1.0.0/cram.csv diff --git a/conf/test.config b/conf/test.config index 00bbe3b..a044044 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,17 +20,20 @@ params { max_time = '6.h' // Input data - input = "${projectDir}/tests/csv/1.0.0/cram.csv" + input = "${projectDir}/tests/csv/1.0.0/bam.csv" // Main options - tools = 'cnvkit' + tools = 'cnvkit,mutect2' //Germlinecnvcaller options - gcnv_scatter_content = 2 - gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" + gcnv_scatter_content = 2 + gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" // Small reference genome - genome = null - igenomes_ignore = true - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" + genome = null + igenomes_ignore = true + dict = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.dict" + fai = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" + target_bed = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" } diff --git a/nextflow.config b/nextflow.config index 8a46860..5ee0d10 100644 --- a/nextflow.config +++ b/nextflow.config @@ -19,9 +19,10 @@ params { fasta = null fai = null dict = null + target_bed = null // Building Panel of Normals and models - tools = null // No default, must be specified + tools = null // No default, must be specified // Mutect2 options pon_name = null diff --git a/tests/csv/1.0.0/bam.csv b/tests/csv/1.0.0/bam.csv new file mode 100644 index 0000000..b95f604 --- /dev/null +++ b/tests/csv/1.0.0/bam.csv @@ -0,0 +1,3 @@ +sample,bam,bai +sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai +sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai diff --git a/tests/csv/1.0.0/cram.csv b/tests/csv/1.0.0/cram.csv deleted file mode 100644 index e132a68..0000000 --- a/tests/csv/1.0.0/cram.csv +++ /dev/null @@ -1,3 +0,0 @@ -sample,bam -sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam -sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 05fd117..c260d60 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -115,7 +115,7 @@ workflow CREATEPANELREFS { if (cram) return [ meta + [data_type:'cram'], cram, crai ] } - BAM_CREATE_SOM_PON_GATK(ch_mutect2_input, + BAM_CREATE_SOM_PON_GATK(ch_mutect2_input.map{ meta, reads, index -> [ meta, reads, index, [] ] }, ch_fasta, ch_fai, ch_dict, From 6be5bbb0162ab492c85ed040ae2b069bb12da45d Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 16:28:41 +0200 Subject: [PATCH 099/234] update schema --- nextflow_schema.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 4cbdeb3..f1924de 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -261,6 +261,15 @@ "fa_icon": "fas fa-file", "exists": true, "mimetype": "text/plain" + }, + "target_bed": { + "type": "string", + "description": "Path to target bed file", + "pattern": "^\\S+\\.bed$", + "format": "file-path", + "fa_icon": "fas fa-file", + "exists": true, + "mimetype": "text/plain" } } }, From 5eaf21ff14eb960d0c62c3f793358edf82b435b8 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 16:48:16 +0200 Subject: [PATCH 100/234] update subworkflow and fix tests --- conf/modules/mutect2.config | 19 ++++ conf/test.config | 2 +- modules.json | 88 ++++++++++++++----- nextflow.config | 1 + .../tests/main.nf.test | 20 +++-- workflows/createpanelrefs.nf | 6 +- 6 files changed, 104 insertions(+), 32 deletions(-) create mode 100644 conf/modules/mutect2.config diff --git a/conf/modules/mutect2.config b/conf/modules/mutect2.config new file mode 100644 index 0000000..04900ee --- /dev/null +++ b/conf/modules/mutect2.config @@ -0,0 +1,19 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + withName: GATK4_MUTECT2 { + ext.args = "--max-mnp-distance 0" + } + +} diff --git a/conf/test.config b/conf/test.config index a044044..763db91 100644 --- a/conf/test.config +++ b/conf/test.config @@ -32,8 +32,8 @@ params { // Small reference genome genome = null igenomes_ignore = true + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" dict = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.dict" fai = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" target_bed = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" } diff --git a/modules.json b/modules.json index b4c5bb7..786d019 100644 --- a/modules.json +++ b/modules.json @@ -8,87 +8,121 @@ "cnvkit/batch": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/annotateintervals": { "branch": "master", "git_sha": "42ae163c3c6eb23646189c30c07a889ad39c9b0e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/bedtointervallist": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/collectreadcounts": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/createreadcountpanelofnormals": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/createsomaticpanelofnormals": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["bam_create_som_pon_gatk"] + "installed_by": [ + "bam_create_som_pon_gatk" + ] }, "gatk4/determinegermlinecontigploidy": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/filterintervals": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/genomicsdbimport": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["bam_create_som_pon_gatk"] + "installed_by": [ + "bam_create_som_pon_gatk" + ] }, "gatk4/germlinecnvcaller": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/intervallisttools": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/mutect2": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["bam_create_som_pon_gatk"] + "installed_by": [ + "bam_create_som_pon_gatk" + ] }, "gatk4/preprocessintervals": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/createsequencedictionary": { "branch": "master", "git_sha": "20b0918591d4ba20047d7e13e5094bcceba81447", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -96,26 +130,34 @@ "nf-core": { "bam_create_som_pon_gatk": { "branch": "master", - "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": ["subworkflows"] + "git_sha": "b0783b07beb65cac505fa6202e8f670437637b45", + "installed_by": [ + "subworkflows" + ] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 5ee0d10..243473a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -266,6 +266,7 @@ includeConfig 'conf/modules/base.config' includeConfig 'conf/modules/cnvkit.config' includeConfig 'conf/modules/germlinecnvcaller_cohort.config' includeConfig 'conf/modules/gens_pon.config' +includeConfig 'conf/modules/mutect2.config' // Function to ensure that resource requirements don't go beyond // a maximum limit diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test index 3efffd4..57aee89 100644 --- a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test @@ -19,17 +19,27 @@ nextflow_workflow { workflow { """ // ch_mutect2_in - input[0] = Channel.of([[ id:'test1' ], file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true),[]],[[ id:'test2' ], file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true), []]) + input[0] = Channel.of([ + [ id:'test1' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + [] ], + [ + [ id:'test2' ], + file(params.modules_testdata_base_path+ 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path+ 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + [] ] + ) // ch_fasta - input[1] = Channel.value([ [ id:'genome' ], file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)]) + input[1] = Channel.value([ [ id:'genome' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)]) // ch_fai - input[2] = Channel.value([ [ id:'genome' ], file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)]) + input[2] = Channel.value([ [ id:'genome' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true)]) // ch_dict - input[3] = Channel.value([ [ id:'genome' ], file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists:true)]) + input[3] = Channel.value([ [ id:'genome' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict', checkIfExists:true)]) // str_pon_norm input[4] = "test_panel" // ch_interval_file - input[5] = Channel.value(file(params.test_data['homo_sapiens']['genome']['genome_21_interval_list'], checkIfExists: true)) + input[5] = Channel.value(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', checkIfExists: true)) """ } } diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index c260d60..a5fb940 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -111,11 +111,11 @@ workflow CREATEPANELREFS { if (params.tools && params.tools.split(',').contains('mutect2')) { ch_mutect2_input = ch_samplesheet.map{meta, bam, bai, cram, crai -> - if (bam) return [ meta + [data_type:'bam'], bam, bai ] - if (cram) return [ meta + [data_type:'cram'], cram, crai ] + if (bam) return [ meta + [data_type:'bam'], bam, bai, [] ] + if (cram) return [ meta + [data_type:'cram'], cram, crai, [] ] } - BAM_CREATE_SOM_PON_GATK(ch_mutect2_input.map{ meta, reads, index -> [ meta, reads, index, [] ] }, + BAM_CREATE_SOM_PON_GATK(ch_mutect2_input, ch_fasta, ch_fai, ch_dict, From d676fdb3cb1d475158ccd8677eb14ebc7df3b84e Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 16:57:04 +0200 Subject: [PATCH 101/234] fix default tests with updated config --- conf/test.config | 2 +- tests/pipeline/default.nf.test.snap | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/conf/test.config b/conf/test.config index 763db91..3912e07 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,7 +23,7 @@ params { input = "${projectDir}/tests/csv/1.0.0/bam.csv" // Main options - tools = 'cnvkit,mutect2' + tools = 'cnvkit' //Germlinecnvcaller options gcnv_scatter_content = 2 diff --git a/tests/pipeline/default.nf.test.snap b/tests/pipeline/default.nf.test.snap index e921a19..4d21008 100644 --- a/tests/pipeline/default.nf.test.snap +++ b/tests/pipeline/default.nf.test.snap @@ -3,16 +3,24 @@ "content": [ "{CNVKIT_BATCH={cnvkit=0.9.10}, Workflow={nf-core/createpanelrefs=v1.0dev}}" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, "timestamp": "2024-02-21T12:37:23.523857103" }, "cnvkit": { "content": [ - "panel.cnn:md5,07dea67088da689ad04012552c606882", - "test.paired_end.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", - "test.paired_end.sorted.targetcoverage.cnn:md5,ff526714696aa49bdc1dc8d00d965266", - "test2.paired_end.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", - "test2.paired_end.sorted.targetcoverage.cnn:md5,6ae6b3fce7299eedca6133d911c38fe1" + "panel.cnn:md5,68028cd2b4e0fc4489bf5bfd0a73440f", + "test.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,0067cc3a0e479b23ab3bf056cead31b4", + "test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test2.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,1e1012812eb893afd931485cb760294e" ], - "timestamp": "2023-07-08T16:47:57+0000" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-04-03T16:55:32.197815" } } \ No newline at end of file From c6afd52fc77cf2a637d0546960b3eb535da2c9e3 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 17:06:36 +0200 Subject: [PATCH 102/234] add tests for mutect2_pon --- tests/pipeline/mutect2.nf.test | 26 ++++++++++++++++++++++++++ tests/pipeline/mutect2.nf.test.snap | 12 ++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 tests/pipeline/mutect2.nf.test create mode 100644 tests/pipeline/mutect2.nf.test.snap diff --git a/tests/pipeline/mutect2.nf.test b/tests/pipeline/mutect2.nf.test new file mode 100644 index 0000000..ac2e429 --- /dev/null +++ b/tests/pipeline/mutect2.nf.test @@ -0,0 +1,26 @@ +nextflow_pipeline { + + name "Test MUTECT2_PON" + script "main.nf" + tag "MUTECT2" + + test("Run MUTECT2 test") { + + when { + params { + outdir = "$outputDir" + tools = 'mutect2' + pon_name = 'test' + validationSchemaIgnoreParams = 'genomes,baseDir,base-dir,outputDir,output-dir' + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/multiqc/").exists() }, + { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") } + ) + } + } +} diff --git a/tests/pipeline/mutect2.nf.test.snap b/tests/pipeline/mutect2.nf.test.snap new file mode 100644 index 0000000..95bd5fc --- /dev/null +++ b/tests/pipeline/mutect2.nf.test.snap @@ -0,0 +1,12 @@ +{ + "software_versions": { + "content": [ + "{GATK4_CREATESOMATICPANELOFNORMALS={gatk4=4.5.0.0}, GATK4_GENOMICSDBIMPORT={gatk4=4.5.0.0}, GATK4_MUTECT2={gatk4=4.5.0.0}, Workflow={nf-core/createpanelrefs=v1.0dev}}" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-04-03T17:06:08.093342" + } +} \ No newline at end of file From 61be19b29d9aa5de983d251ec222b06b8abf372c Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 17:07:52 +0200 Subject: [PATCH 103/234] update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8cf857e..7d797ff 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ results/ testing* testing/ work/ +.nf-test.log From 3f2124ee792badf8ff5aa77ef1b568904afb61b9 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 17:10:07 +0200 Subject: [PATCH 104/234] prettier --- modules.json | 86 ++++++++++++++-------------------------------------- 1 file changed, 22 insertions(+), 64 deletions(-) diff --git a/modules.json b/modules.json index 786d019..49a3fcc 100644 --- a/modules.json +++ b/modules.json @@ -8,121 +8,87 @@ "cnvkit/batch": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/annotateintervals": { "branch": "master", "git_sha": "42ae163c3c6eb23646189c30c07a889ad39c9b0e", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/bedtointervallist": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/collectreadcounts": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/createreadcountpanelofnormals": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/createsomaticpanelofnormals": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": [ - "bam_create_som_pon_gatk" - ] + "installed_by": ["bam_create_som_pon_gatk"] }, "gatk4/determinegermlinecontigploidy": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/filterintervals": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/genomicsdbimport": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "bam_create_som_pon_gatk" - ] + "installed_by": ["bam_create_som_pon_gatk"] }, "gatk4/germlinecnvcaller": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/intervallisttools": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/mutect2": { "branch": "master", "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", - "installed_by": [ - "bam_create_som_pon_gatk" - ] + "installed_by": ["bam_create_som_pon_gatk"] }, "gatk4/preprocessintervals": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "picard/createsequencedictionary": { "branch": "master", "git_sha": "20b0918591d4ba20047d7e13e5094bcceba81447", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/index": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -131,33 +97,25 @@ "bam_create_som_pon_gatk": { "branch": "master", "git_sha": "b0783b07beb65cac505fa6202e8f670437637b45", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} From b3c21cd71708c841d285c47c3ee1e4ca98b19460 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 3 Apr 2024 17:21:10 +0200 Subject: [PATCH 105/234] add mutect2 tests to CI --- tests/config/tags.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/config/tags.yml b/tests/config/tags.yml index 3672b33..2deb666 100644 --- a/tests/config/tags.yml +++ b/tests/config/tags.yml @@ -10,7 +10,16 @@ default: cnvkit: - conf/modules/cnvkit.config - - modules/nf-core/cnvkit/batch/main.nf + - modules/nf-core/cnvkit/batch/** + - tests/cnvkit.nf.test + +mutect2: + - conf/modules/mutect2.config + - modules/nf-core/gatk4/mutect2/** + - modules/nf-core/gatk4/genomicsdbimport/** + - modules/nf-core/gatk4/createsomaticpanelofnormals/** + - modules/nf-core/cnvkit/batch/** + - subworkflows/nf-core/bam_create_som_pon_gatk/** - tests/cnvkit.nf.test germlinecnvcaller_cohort: From 2a35f8d7c4a3f392ac26a9acc70832380edfcf2b Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Thu, 4 Apr 2024 16:39:13 +0200 Subject: [PATCH 106/234] Update nextflow.config Co-authored-by: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 243473a..5acc3bc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -142,7 +142,7 @@ profiles { } singularity { singularity.enabled = true - singularity.autoMoun = true + singularity.autoMounts = true conda.enabled = false docker.enabled = false podman.enabled = false From bf5a2d65e0bce96860303927f17f3c8f5baf3acf Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Thu, 4 Apr 2024 16:43:39 +0200 Subject: [PATCH 107/234] Update conf/modules/mutect2.config Co-authored-by: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> --- conf/modules/mutect2.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules/mutect2.config b/conf/modules/mutect2.config index 04900ee..9bbbb9d 100644 --- a/conf/modules/mutect2.config +++ b/conf/modules/mutect2.config @@ -12,7 +12,7 @@ process { - withName: GATK4_MUTECT2 { + withName: '.*BAM_CREATE_SOM_PON_GATK:GATK4_MUTECT2' { ext.args = "--max-mnp-distance 0" } From 16d37de26d4d08f3231d9095f6afcff94fa128cd Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 4 Apr 2024 16:41:27 +0200 Subject: [PATCH 108/234] feat: make tools a required params --- nextflow_schema.json | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index f1924de..4e61ed8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -203,6 +203,7 @@ "title": "Main options", "type": "object", "description": "Most common options used for the pipeline", + "required": ["tools"], "default": "", "properties": { "tools": { From c967164c9947bdc326eff5f3335a54247561aa31 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 4 Apr 2024 16:43:20 +0200 Subject: [PATCH 109/234] feat: pon_name is now mutect2_pon_name --- nextflow.config | 2 +- nextflow_schema.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 5acc3bc..a837e6e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -25,7 +25,7 @@ params { tools = null // No default, must be specified // Mutect2 options - pon_name = null + mutect2_pon_name = null // Germlinecnvcaller options gcnv_analysis_type = 'wgs' diff --git a/nextflow_schema.json b/nextflow_schema.json index 4e61ed8..f03260a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -148,7 +148,7 @@ "description": "Options used by the mutect2 subworkflow", "default": "", "properties": { - "pon_name": { + "mutect2_pon_name": { "type": "string" } } From f05ca82ec7b3291b5324e6ffee2f1afb2b801df4 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 4 Apr 2024 16:44:47 +0200 Subject: [PATCH 110/234] fix: pon_name is mutect2_pon_name in the test too --- tests/pipeline/mutect2.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipeline/mutect2.nf.test b/tests/pipeline/mutect2.nf.test index ac2e429..767e0f9 100644 --- a/tests/pipeline/mutect2.nf.test +++ b/tests/pipeline/mutect2.nf.test @@ -10,7 +10,7 @@ nextflow_pipeline { params { outdir = "$outputDir" tools = 'mutect2' - pon_name = 'test' + mutect2_pon_name = 'test' validationSchemaIgnoreParams = 'genomes,baseDir,base-dir,outputDir,output-dir' } } From 2a84359f13eb9ac4d474a0b8c656deda39d7e965 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 4 Apr 2024 17:11:35 +0200 Subject: [PATCH 111/234] fix: fix usage of mutect2_pon_name params usage --- workflows/createpanelrefs.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index a5fb940..ac8b2be 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -119,7 +119,7 @@ workflow CREATEPANELREFS { ch_fasta, ch_fai, ch_dict, - params.pon_name, + params.mutect2_pon_name, ch_target_bed.map{ meta, bed -> [ bed ] }) ch_versions = ch_versions.mix(BAM_CREATE_SOM_PON_GATK.out.versions) From e5d34f9e6eda54b0f52f155f296ea3ce1078f397 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 5 Apr 2024 14:49:36 +0200 Subject: [PATCH 112/234] refactor --- conf/modules/germlinecnvcaller_cohort.config | 6 +- conf/test.config | 2 +- docs/usage.md | 1 - main.nf | 41 ++++++++++---- nextflow.config | 7 +-- nextflow_schema.json | 36 ++++++------ subworkflows/local/gens_pon.nf | 7 ++- .../local/germlinecnvcaller_cohort.nf | 15 ++--- tests/config/tags.yml | 20 ++++++- tests/pipeline/gens_pon.nf.test | 3 +- .../pipeline/germlinecnvcaller_cohort.nf.test | 12 ++-- workflows/createpanelrefs.nf | 55 ++++++++++--------- 12 files changed, 123 insertions(+), 82 deletions(-) diff --git a/conf/modules/germlinecnvcaller_cohort.config b/conf/modules/germlinecnvcaller_cohort.config index cf85887..f179d7d 100644 --- a/conf/modules/germlinecnvcaller_cohort.config +++ b/conf/modules/germlinecnvcaller_cohort.config @@ -37,11 +37,11 @@ process { } withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST_TARGETS' { - ext.when = { params.analysis_type.equals("wes") && params.target_interval_list.equals(null) && params.target_bed } + ext.when = { params.gcnv_analysis_type.equals("wes") && params.gcnv_target_interval_list.equals(null) && params.gcnv_target_bed } } withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST_EXCLUDE' { - ext.when = { params.analysis_type.equals("wes") && params.exclude_interval_list.equals(null) && params.exclude_bed } + ext.when = { params.gcnv_analysis_type.equals("wes") && params.gcnv_exclude_interval_list.equals(null) && params.gcnv_exclude_bed } } withName: '.*GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS' { @@ -52,7 +52,7 @@ process { } withName: '.*GERMLINECNVCALLER_COHORT:GATK4_COLLECTREADCOUNTS' { - ext.args = {"--format ${params.readcount_format} --imr OVERLAPPING_ONLY"} + ext.args = {"--format ${params.gcnv_readcount_format} --imr OVERLAPPING_ONLY"} publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/germlinecnvcaller/readcounts" }, diff --git a/conf/test.config b/conf/test.config index 3912e07..9c5822d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -35,5 +35,5 @@ params { fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" dict = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.dict" fai = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" - target_bed = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + mutect2_target_bed = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" } diff --git a/docs/usage.md b/docs/usage.md index e84c726..72f139d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -151,7 +151,6 @@ If you are running the pipeline to generate references for the GATK's germlinecn | | gcnv_padding | | | gcnv_readcount_format | | | gcnv_scatter_content | -| | gcnv_segmental_duplications | 1 To learn more about this file, see [this comment](https://gatk.broadinstitute.org/hc/en-us/community/posts/360074399831/comments/13441240230299) on GATK forum.
diff --git a/main.nf b/main.nf index 2dfd69b..26bc76b 100644 --- a/main.nf +++ b/main.nf @@ -11,17 +11,6 @@ nextflow.enable.dsl = 2 -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -include { CREATEPANELREFS } from './workflows/createpanelrefs' -include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline' -include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline' - -include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline' - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GENOME PARAMETER VALUES @@ -39,7 +28,16 @@ params.gcnv_mappable_regions = getGenomeAttribute('gcnv_mappable_regions') params.gcnv_target_bed = getGenomeAttribute('gcnv_target_bed') params.gcnv_target_interval_list = getGenomeAttribute('gcnv_target_interval_list') params.gcnv_ploidy_priors = getGenomeAttribute('gcnv_ploidy_priors') -params.gcnv_segmental_duplications = getGenomeAttribute('gcnv_segmental_duplications') +params.mutect2_target_bed = getGenomeAttribute('mutect2_target_bed') + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +include { CREATEPANELREFS } from './workflows/createpanelrefs' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -112,6 +110,25 @@ workflow { ) } +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + DEFINE FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Get attribute from genome config file e.g. fasta +// + +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/nextflow.config b/nextflow.config index a837e6e..5fa9304 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,10 +16,6 @@ params { genome = null igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false - fasta = null - fai = null - dict = null - target_bed = null // Building Panel of Normals and models tools = null // No default, must be specified @@ -30,13 +26,14 @@ params { // Germlinecnvcaller options gcnv_analysis_type = 'wgs' gcnv_bin_length = 1000 + gcnv_pon_name = 'germlinecnvcaller' gcnv_padding = 0 gcnv_readcount_format = 'HDF5' gcnv_scatter_content = 5000 - gcnv_segmental_duplications = null // Gens options gens_bin_length = 100 + gens_pon_name = 'gens' gens_readcount_format = 'HDF5' // CNVkit options diff --git a/nextflow_schema.json b/nextflow_schema.json index f03260a..2bef177 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -58,6 +58,11 @@ "fa_icon": "fas fa-sort-numeric-down", "help_text": "Used by GATK's PreprocessIntervals. GATK recommends a padding of 0 for WGS analysis, and 250 for WES analysis." }, + "gcnv_pon_name": { + "type": "string", + "description": "Name for panel of normals.", + "default": "germlinecnvcaller" + }, "gcnv_ploidy_priors": { "type": "string", "exists": true, @@ -81,14 +86,6 @@ "fa_icon": "fas fa-sort-numeric-down", "help_text": "Used by GATK/Picards's IntervalListTools." }, - "gcnv_segmental_duplications": { - "type": "string", - "exists": true, - "description": "Path to segmental-duplication track in .bed or .bed.gz format. Overlapping intervals must be merged.", - "format": "file-path", - "fa_icon": "fas fa-file", - "help_text": "Used by GATK's AnnotateIntervals." - }, "gcnv_target_bed": { "type": "string", "exists": true, @@ -122,6 +119,11 @@ "fa_icon": "fas fa-sort-numeric-down", "help_text": "Used by GATK's PreprocessIntervals. We recommend a bin length of 100." }, + "gens_pon_name": { + "type": "string", + "description": "Name for panel of normals.", + "default": "gens" + }, "gens_readcount_format": { "type": "string", "description": "Output file format for count data", @@ -150,6 +152,15 @@ "properties": { "mutect2_pon_name": { "type": "string" + }, + "mutect2_target_bed": { + "type": "string", + "description": "Path to target bed file", + "pattern": "^\\S+\\.bed$", + "format": "file-path", + "fa_icon": "fas fa-file", + "exists": true, + "mimetype": "text/plain" } } }, @@ -262,15 +273,6 @@ "fa_icon": "fas fa-file", "exists": true, "mimetype": "text/plain" - }, - "target_bed": { - "type": "string", - "description": "Path to target bed file", - "pattern": "^\\S+\\.bed$", - "format": "file-path", - "fa_icon": "fas fa-file", - "exists": true, - "mimetype": "text/plain" } } }, diff --git a/subworkflows/local/gens_pon.nf b/subworkflows/local/gens_pon.nf index 38c9727..36cea2a 100644 --- a/subworkflows/local/gens_pon.nf +++ b/subworkflows/local/gens_pon.nf @@ -7,10 +7,11 @@ include { SAMTOOLS_INDEX } from '../../modules/nf-core/samt workflow GENS_PON { take: - ch_user_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_user_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_user_dict // channel: [optional] [ val(meta), path(dict) ] + ch_user_fai // channel: [optional] [ val(meta), path(fai) ] ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] + val_pon_name // string: [optional] name for panel of normals main: ch_versions = Channel.empty() @@ -70,7 +71,7 @@ workflow GENS_PON { .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) .collect { it[1] } .map { it -> - return [[id:"gens_pon"], it] + return [[id:val_pon_name], it] } .set { ch_readcounts_out } diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort.nf index 77303b6..ea10529 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort.nf @@ -13,15 +13,16 @@ include { SAMTOOLS_INDEX } from '../ workflow GERMLINECNVCALLER_COHORT { take: - ch_user_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_user_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_user_dict // channel: [optional] [ val(meta), path(dict) ] + ch_user_fai // channel: [optional] [ val(meta), path(fai) ] ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] - ch_target_bed // channel: [mandatory] [ val(meta), path(bed) ] - ch_user_target_interval_list // channel: [mandatory] [ val(meta), path(intervals) ] - ch_exclude_bed // channel: [mandatory] [ val(meta), path(bed) ] - ch_user_exclude_interval_list // channel: [mandatory] [ val(meta), path(intervals) ] + ch_target_bed // channel: [optional] [ val(meta), path(bed) ] + ch_user_target_interval_list // channel: [optional] [ val(meta), path(intervals) ] + ch_exclude_bed // channel: [optional] [ val(meta), path(bed) ] + ch_user_exclude_interval_list // channel: [optional] [ val(meta), path(intervals) ] + val_pon_name // string: [optional] name for panel of normals main: ch_versions = Channel.empty() @@ -124,7 +125,7 @@ workflow GERMLINECNVCALLER_COHORT { GATK4_COLLECTREADCOUNTS.out.tsv .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) .collect { it[1] } - .map {tsv -> [[id:'cohort'],tsv]} + .map {tsv -> [[id:val_pon_name],tsv]} .set { ch_readcounts_out } diff --git a/tests/config/tags.yml b/tests/config/tags.yml index 2deb666..ccae2a9 100644 --- a/tests/config/tags.yml +++ b/tests/config/tags.yml @@ -18,12 +18,22 @@ mutect2: - modules/nf-core/gatk4/mutect2/** - modules/nf-core/gatk4/genomicsdbimport/** - modules/nf-core/gatk4/createsomaticpanelofnormals/** - - modules/nf-core/cnvkit/batch/** - subworkflows/nf-core/bam_create_som_pon_gatk/** - - tests/cnvkit.nf.test germlinecnvcaller_cohort: - conf/modules/germlinecnvcaller_cohort.config + - modules/nf-core/gatk4/annotateintervals/** + - modules/nf-core/gatk4/bedtointervallist/** + - modules/nf-core/gatk4/bedtointervallist/** + - modules/nf-core/gatk4/collectreadcounts/** + - modules/nf-core/gatk4/determinegermlinecontigploidy/** + - modules/nf-core/gatk4/filterintervals/** + - modules/nf-core/gatk4/germlinecnvcaller/** + - modules/nf-core/gatk4/intervallisttools/** + - modules/nf-core/gatk4/preprocessintervals/** + - modules/nf-core/picard/createsequencedictionary/** + - modules/nf-core/samtools/faidx/** + - modules/nf-core/samtools/index/** - subworkflows/local/germlinecnvcaller_cohort.nf - tests/pipeline/germlinecnvcaller_cohort.nf.test - tests/pipeline/germlinecnvcaller_cohort.nf.config @@ -31,6 +41,12 @@ germlinecnvcaller_cohort: gens_pon: - conf/modules/gens_pon.config + - modules/nf-core/gatk4/collectreadcounts/** + - modules/nf-core/gatk4/createreadcountpanelofnormals/** + - modules/nf-core/gatk4/preprocessintervals/** + - modules/nf-core/picard/createsequencedictionary/** + - modules/nf-core/samtools/faidx/** + - modules/nf-core/samtools/index/** - subworkflows/local/gens_pon.nf - tests/pipeline/gens_pon.nf.test - tests/test_assets/gens_software_versions.yaml diff --git a/tests/pipeline/gens_pon.nf.test b/tests/pipeline/gens_pon.nf.test index b4ff563..7f712c7 100644 --- a/tests/pipeline/gens_pon.nf.test +++ b/tests/pipeline/gens_pon.nf.test @@ -15,7 +15,7 @@ nextflow_workflow { gens_bin_length = 100 tools = 'gens' fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" - + gens_pon_name = 'gens_pon' } workflow { """ @@ -25,6 +25,7 @@ nextflow_workflow { input[3] = Channel.of( [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"),[]], [[ id:'test2' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam"),[]]) + input[4] = params.gens_pon_name """ } } diff --git a/tests/pipeline/germlinecnvcaller_cohort.nf.test b/tests/pipeline/germlinecnvcaller_cohort.nf.test index db2442c..2720a8f 100644 --- a/tests/pipeline/germlinecnvcaller_cohort.nf.test +++ b/tests/pipeline/germlinecnvcaller_cohort.nf.test @@ -10,11 +10,12 @@ nextflow_workflow { when { params { - outdir = "$outputDir" - tools = 'germlinecnvcaller' + outdir = "$outputDir" + tools = 'germlinecnvcaller' + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" gcnv_scatter_content = 2 - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" - ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" + gcnv_pon_name = 'cohort' + gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" } workflow { """ @@ -24,11 +25,12 @@ nextflow_workflow { input[3] = Channel.of( [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"),[]], [[ id:'test2' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam"),[]]) - input[4] = Channel.fromPath(params.ploidy_priors) + input[4] = Channel.fromPath(params.gcnv_ploidy_priors) input[5] = Channel.value([[:],[]]) input[6] = Channel.value([[:],[]]) input[7] = Channel.value([[:],[]]) input[8] = Channel.value([[:],[]]) + input[9] = params.gcnv_pon_name """ } } diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index ac8b2be..f4a2a94 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -29,24 +29,26 @@ include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch/ma include { MULTIQC } from '../modules/nf-core/multiqc/main' // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName], targets]}.collect() - : Channel.value([[id:'null'], []]) -ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName], dict]}.collect() - : Channel.empty() -ch_exclude_bed = params.exclude_bed ? Channel.fromPath(params.exclude_bed).map { exclude -> [[id:exclude.baseName], exclude]}.collect() - : Channel.value([[id:'null'], []]) -ch_exclude_interval_list = params.exclude_interval_list ? Channel.fromPath(params.exclude_interval_list).map { exclude -> [[id:exclude.baseName], exclude]}.collect() - : Channel.value([[id:'null'], []]) -ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName], fai]}.collect() - : Channel.empty() -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName], fasta]}.collect() - : Channel.empty() -ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() - : Channel.empty() -ch_target_bed = params.target_bed ? Channel.fromPath(params.target_bed).map { targets -> [[id:targets.baseName], targets]}.collect() - : Channel.value([[id:'null'], []]) -ch_target_interval_list = params.target_interval_list ? Channel.fromPath(params.target_interval_list).map { targets -> [[id:targets.baseName], targets]}.collect() - : Channel.value([[id:'null'], []]) +ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName], targets]}.collect() + : Channel.value([[id:'null'], []]) +ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName], dict]}.collect() + : Channel.empty() +ch_gcnv_exclude_bed = params.gcnv_exclude_bed ? Channel.fromPath(params.gcnv_exclude_bed).map { exclude -> [[id:exclude.baseName], exclude]}.collect() + : Channel.value([[id:'null'], []]) +ch_gcnv_exclude_interval_list = params.gcnv_exclude_interval_list ? Channel.fromPath(params.gcnv_exclude_interval_list).map { exclude -> [[id:exclude.baseName], exclude]}.collect() + : Channel.value([[id:'null'], []]) +ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName], fai]}.collect() + : Channel.empty() +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName], fasta]}.collect() + : Channel.empty() +ch_gcnv_ploidy_priors = params.gcnv_ploidy_priors ? Channel.fromPath(params.gcnv_ploidy_priors).collect() + : Channel.empty() +ch_gcnv_target_bed = params.gcnv_target_bed ? Channel.fromPath(params.gcnv_target_bed).map { targets -> [[id:targets.baseName], targets]}.collect() + : Channel.value([[id:'null'], []]) +ch_gcnv_target_interval_list = params.gcnv_target_interval_list ? Channel.fromPath(params.gcnv_target_interval_list).map { targets -> [[id:targets.baseName], targets]}.collect() + : Channel.value([[id:'null'], []]) +ch_mutect2_target_bed = params.mutect2_target_bed ? Channel.fromPath(params.mutect2_target_bed).collect() + : Channel.value([]) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -99,11 +101,12 @@ workflow CREATEPANELREFS { ch_fai, ch_fasta, ch_germlinecnvcaller_input, - ch_ploidy_priors, - ch_target_bed, - ch_target_interval_list, - ch_exclude_bed, - ch_exclude_interval_list ) + ch_gcnv_ploidy_priors, + ch_gcnv_target_bed, + ch_gcnv_target_interval_list, + ch_gcnv_exclude_bed, + ch_gcnv_exclude_interval_list, + params.gcnv_pon_name ) ch_versions = ch_versions.mix(GERMLINECNVCALLER_COHORT.out.versions) } @@ -120,7 +123,7 @@ workflow CREATEPANELREFS { ch_fai, ch_dict, params.mutect2_pon_name, - ch_target_bed.map{ meta, bed -> [ bed ] }) + ch_mutect2_target_bed) ch_versions = ch_versions.mix(BAM_CREATE_SOM_PON_GATK.out.versions) @@ -138,7 +141,9 @@ workflow CREATEPANELREFS { GENS_PON(ch_dict, ch_fai, ch_fasta, - ch_gens_input) + ch_gens_input, + params.gens_pon_name ) + ch_versions = ch_versions.mix(GENS_PON.out.versions) } From 3e42ba5cd2987bee0f52843d615b525f7fb78b23 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 5 Apr 2024 15:57:56 +0200 Subject: [PATCH 113/234] update test --- tests/pipeline/gens_pon.nf.test | 2 ++ tests/pipeline/germlinecnvcaller_cohort.nf.test | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/pipeline/gens_pon.nf.test b/tests/pipeline/gens_pon.nf.test index 7f712c7..03276be 100644 --- a/tests/pipeline/gens_pon.nf.test +++ b/tests/pipeline/gens_pon.nf.test @@ -15,6 +15,8 @@ nextflow_workflow { gens_bin_length = 100 tools = 'gens' fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" + fai = null + dict = null gens_pon_name = 'gens_pon' } workflow { diff --git a/tests/pipeline/germlinecnvcaller_cohort.nf.test b/tests/pipeline/germlinecnvcaller_cohort.nf.test index 2720a8f..5b451a2 100644 --- a/tests/pipeline/germlinecnvcaller_cohort.nf.test +++ b/tests/pipeline/germlinecnvcaller_cohort.nf.test @@ -13,6 +13,8 @@ nextflow_workflow { outdir = "$outputDir" tools = 'germlinecnvcaller' fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" + fai = null + dict = null gcnv_scatter_content = 2 gcnv_pon_name = 'cohort' gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" From 9dca5b12fd2ea7086a49a5af3c13be093d5d8341 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 6 Apr 2024 15:15:47 +0200 Subject: [PATCH 114/234] hopefully, final commit --- CHANGELOG.md | 8 +++ README.md | 4 +- conf/modules/germlinecnvcaller_cohort.config | 8 +++ docs/usage.md | 3 + modules.json | 5 ++ .../gatk4/indexfeaturefile/environment.yml | 7 +++ .../nf-core/gatk4/indexfeaturefile/main.nf | 41 +++++++++++++ .../nf-core/gatk4/indexfeaturefile/meta.yml | 43 ++++++++++++++ nextflow.config | 2 +- nextflow_schema.json | 10 +++- .../local/germlinecnvcaller_cohort.nf | 59 +++++++++++-------- tests/config/tags.yml | 1 + .../pipeline/germlinecnvcaller_cohort.nf.test | 24 ++++---- workflows/createpanelrefs.nf | 35 +++++++---- 14 files changed, 200 insertions(+), 50 deletions(-) create mode 100644 modules/nf-core/gatk4/indexfeaturefile/environment.yml create mode 100644 modules/nf-core/gatk4/indexfeaturefile/main.nf create mode 100644 modules/nf-core/gatk4/indexfeaturefile/meta.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 446d7b0..bacac6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,14 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#5](https://github.com/nf-core/createpanelrefs/pull/5) - Usage of nf-validation - [#5](https://github.com/nf-core/createpanelrefs/pull/5) - Usage of nf-test - [#8](https://github.com/nf-core/createpanelrefs/pull/8) - `Mutect2` can be used to create a PON +- [#10](https://github.com/nf-core/createpanelrefs/pull/10) - `GATK germlinecnvcaller` can be used to create a PON +- [#17](https://github.com/nf-core/createpanelrefs/pull/17) - `GENS` can be used to create a PON + +### `Updated` + +- [#19](https://github.com/nf-core/createpanelrefs/pull/19) - Updates germlinecnvcaller subworkflow to handle exome samples +- [#24](https://github.com/nf-core/createpanelrefs/pull/24) - Updates germlinecnvcaller subworkflow to use mappability and segmental duplications track +- [#24](https://github.com/nf-core/createpanelrefs/pull/24) - Updates germlinecnvcaller and gens subworkflows to use custom names for panel of normals. ### `Fixed` diff --git a/README.md b/README.md index 549e553..d35c1fa 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,9 @@ 1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 2. Build Panel of Normals for [`CNVKIT`](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873) 3. Build ploidy and cnv calling models for [`GATK's germlinecnvcaller workflow`](https://genome.cshlp.org/content/20/9/1297) -4. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +4. Build Panel of Normals for [`GENS`](https://github.com/Clinical-Genomics-Lund/gens) +5. Build Panel of Normals for [`Mutect2`](https://genome.cshlp.org/content/20/9/1297) +6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) ## Usage diff --git a/conf/modules/germlinecnvcaller_cohort.config b/conf/modules/germlinecnvcaller_cohort.config index f179d7d..d4056f9 100644 --- a/conf/modules/germlinecnvcaller_cohort.config +++ b/conf/modules/germlinecnvcaller_cohort.config @@ -36,6 +36,14 @@ process { ] } + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_INDEXFEATUREFILE_MAPPABILITY' { + ext.when = { !params.gcnv_mappable_regions.equals(null) } + } + + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_INDEXFEATUREFILE_SEGDUP' { + ext.when = { !params.gcnv_segmental_duplications.equals(null) } + } + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST_TARGETS' { ext.when = { params.gcnv_analysis_type.equals("wes") && params.gcnv_target_interval_list.equals(null) && params.gcnv_target_bed } } diff --git a/docs/usage.md b/docs/usage.md index 72f139d..7f57c47 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -134,6 +134,7 @@ If you are running the pipeline to generate references for the gens workflow, yo | fasta/genomes | fai | | | dict | | | gens_bin_length | +| | gens_pon_name | | | gens_readcount_format | ### germlinecnvcaller @@ -149,8 +150,10 @@ If you are running the pipeline to generate references for the GATK's germlinecn | | gcnv_bin_length | | | gcnv_mappable_regions | | | gcnv_padding | +| | gcnv_model_name | | | gcnv_readcount_format | | | gcnv_scatter_content | +| | gcnv_segmental_duplications | 1 To learn more about this file, see [this comment](https://gatk.broadinstitute.org/hc/en-us/community/posts/360074399831/comments/13441240230299) on GATK forum.
diff --git a/modules.json b/modules.json index 49a3fcc..93087ea 100644 --- a/modules.json +++ b/modules.json @@ -55,6 +55,11 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, + "gatk4/indexfeaturefile": { + "branch": "master", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", + "installed_by": ["modules"] + }, "gatk4/intervallisttools": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", diff --git a/modules/nf-core/gatk4/indexfeaturefile/environment.yml b/modules/nf-core/gatk4/indexfeaturefile/environment.yml new file mode 100644 index 0000000..dccdb15 --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_indexfeaturefile +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/indexfeaturefile/main.nf b/modules/nf-core/gatk4/indexfeaturefile/main.nf new file mode 100644 index 0000000..b1a2698 --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/main.nf @@ -0,0 +1,41 @@ +process GATK4_INDEXFEATUREFILE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(feature_file) + + output: + tuple val(meta), path("*.{tbi,idx}"), emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK IndexFeatureFile] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + IndexFeatureFile \\ + --input $feature_file \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/indexfeaturefile/meta.yml b/modules/nf-core/gatk4/indexfeaturefile/meta.yml new file mode 100644 index 0000000..575c603 --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/meta.yml @@ -0,0 +1,43 @@ +name: gatk4_indexfeaturefile +description: Creates an index for a feature file, e.g. VCF or BED file. +keywords: + - feature + - gatk4 + - index + - indexfeaturefile +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["BSD-3-clause"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - feature_file: + type: file + description: VCF/BED file + pattern: "*.{vcf,vcf.gz,bed,bed.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: Index for VCF/BED file + pattern: "*.{tbi,idx}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@santiagorevale" +maintainers: + - "@santiagorevale" diff --git a/nextflow.config b/nextflow.config index 5fa9304..337999d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -26,7 +26,7 @@ params { // Germlinecnvcaller options gcnv_analysis_type = 'wgs' gcnv_bin_length = 1000 - gcnv_pon_name = 'germlinecnvcaller' + gcnv_model_name = 'germlinecnvcaller' gcnv_padding = 0 gcnv_readcount_format = 'HDF5' gcnv_scatter_content = 5000 diff --git a/nextflow_schema.json b/nextflow_schema.json index 2bef177..8ca60ca 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -58,7 +58,7 @@ "fa_icon": "fas fa-sort-numeric-down", "help_text": "Used by GATK's PreprocessIntervals. GATK recommends a padding of 0 for WGS analysis, and 250 for WES analysis." }, - "gcnv_pon_name": { + "gcnv_model_name": { "type": "string", "description": "Name for panel of normals.", "default": "germlinecnvcaller" @@ -86,6 +86,14 @@ "fa_icon": "fas fa-sort-numeric-down", "help_text": "Used by GATK/Picards's IntervalListTools." }, + "gcnv_segmental_duplications": { + "type": "string", + "exists": true, + "description": "Path to segmental-duplication track in .bed or .bed.gz format. Overlapping intervals must be merged.", + "format": "file-path", + "fa_icon": "fas fa-file", + "help_text": "Used by GATK's AnnotateIntervals." + }, "gcnv_target_bed": { "type": "string", "exists": true, diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort.nf index ea10529..21d82ab 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort.nf @@ -1,28 +1,32 @@ -include { GATK4_ANNOTATEINTERVALS } from '../../modules/nf-core/gatk4/annotateintervals/main' -include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_TARGETS } from '../../modules/nf-core/gatk4/bedtointervallist/main' -include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_EXCLUDE } from '../../modules/nf-core/gatk4/bedtointervallist/main' -include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main' -include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../modules/nf-core/gatk4/determinegermlinecontigploidy/main' -include { GATK4_FILTERINTERVALS } from '../../modules/nf-core/gatk4/filterintervals/main' -include { GATK4_GERMLINECNVCALLER } from '../../modules/nf-core/gatk4/germlinecnvcaller/main' -include { GATK4_INTERVALLISTTOOLS } from '../../modules/nf-core/gatk4/intervallisttools/main' -include { GATK4_PREPROCESSINTERVALS } from '../../modules/nf-core/gatk4/preprocessintervals/main' -include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main' -include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { GATK4_ANNOTATEINTERVALS } from '../../modules/nf-core/gatk4/annotateintervals/main' +include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_TARGETS } from '../../modules/nf-core/gatk4/bedtointervallist/main' +include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_EXCLUDE } from '../../modules/nf-core/gatk4/bedtointervallist/main' +include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main' +include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../modules/nf-core/gatk4/determinegermlinecontigploidy/main' +include { GATK4_FILTERINTERVALS } from '../../modules/nf-core/gatk4/filterintervals/main' +include { GATK4_GERMLINECNVCALLER } from '../../modules/nf-core/gatk4/germlinecnvcaller/main' +include { GATK4_INDEXFEATUREFILE as GATK4_INDEXFEATUREFILE_MAPPABILITY } from '../../modules/nf-core/gatk4/indexfeaturefile/main' +include { GATK4_INDEXFEATUREFILE as GATK4_INDEXFEATUREFILE_SEGDUP } from '../../modules/nf-core/gatk4/indexfeaturefile/main' +include { GATK4_INTERVALLISTTOOLS } from '../../modules/nf-core/gatk4/intervallisttools/main' +include { GATK4_PREPROCESSINTERVALS } from '../../modules/nf-core/gatk4/preprocessintervals/main' +include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main' +include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' workflow GERMLINECNVCALLER_COHORT { take: - ch_user_dict // channel: [optional] [ val(meta), path(dict) ] - ch_user_fai // channel: [optional] [ val(meta), path(fai) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] - ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] - ch_target_bed // channel: [optional] [ val(meta), path(bed) ] - ch_user_target_interval_list // channel: [optional] [ val(meta), path(intervals) ] - ch_exclude_bed // channel: [optional] [ val(meta), path(bed) ] - ch_user_exclude_interval_list // channel: [optional] [ val(meta), path(intervals) ] - val_pon_name // string: [optional] name for panel of normals + ch_user_dict // channel: [optional] [ val(meta), path(dict) ] + ch_user_fai // channel: [optional] [ val(meta), path(fai) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] + ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] + ch_mappable_regions // channel: [optional] [ val(meta), path(bed) ] + ch_segmental_duplications // channel: [optional] [ val(meta), path(bed) ] + ch_target_bed // channel: [optional] [ val(meta), path(bed) ] + ch_user_target_interval_list // channel: [optional] [ val(meta), path(intervals) ] + ch_exclude_bed // channel: [optional] [ val(meta), path(bed) ] + ch_user_exclude_interval_list // channel: [optional] [ val(meta), path(intervals) ] + val_pon_name // string: [optional] name for panel of normals main: ch_versions = Channel.empty() @@ -34,6 +38,10 @@ workflow GERMLINECNVCALLER_COHORT { PICARD_CREATESEQUENCEDICTIONARY ( ch_fasta ) + GATK4_INDEXFEATUREFILE_MAPPABILITY ( ch_mappable_regions ) + + GATK4_INDEXFEATUREFILE_SEGDUP ( ch_segmental_duplications ) + ch_user_dict .mix(PICARD_CREATESEQUENCEDICTIONARY.out.reference_dict) .collect() @@ -85,7 +93,10 @@ workflow GERMLINECNVCALLER_COHORT { ch_fasta, ch_fai, ch_dict, - [[:],[]], [[:],[]], [[:],[]], [[:],[]]) + ch_mappable_regions, + GATK4_INDEXFEATUREFILE_MAPPABILITY.out.index.ifEmpty([[:],[]]), + ch_segmental_duplications, + GATK4_INDEXFEATUREFILE_SEGDUP.out.index.ifEmpty([[:],[]])) // // Filter out files that lack indices, and generate them @@ -165,6 +176,8 @@ workflow GERMLINECNVCALLER_COHORT { ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions.first()) ch_versions = ch_versions.mix(GATK4_ANNOTATEINTERVALS.out.versions) ch_versions = ch_versions.mix(GATK4_FILTERINTERVALS.out.versions) + ch_versions = ch_versions.mix(GATK4_INDEXFEATUREFILE_MAPPABILITY.out.versions) + ch_versions = ch_versions.mix(GATK4_INDEXFEATUREFILE_SEGDUP.out.versions) ch_versions = ch_versions.mix(GATK4_INTERVALLISTTOOLS.out.versions) ch_versions = ch_versions.mix(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.versions) ch_versions = ch_versions.mix(GATK4_GERMLINECNVCALLER.out.versions.first()) diff --git a/tests/config/tags.yml b/tests/config/tags.yml index ccae2a9..0bb478d 100644 --- a/tests/config/tags.yml +++ b/tests/config/tags.yml @@ -29,6 +29,7 @@ germlinecnvcaller_cohort: - modules/nf-core/gatk4/determinegermlinecontigploidy/** - modules/nf-core/gatk4/filterintervals/** - modules/nf-core/gatk4/germlinecnvcaller/** + - modules/nf-core/gatk4/indexfeaturefile/** - modules/nf-core/gatk4/intervallisttools/** - modules/nf-core/gatk4/preprocessintervals/** - modules/nf-core/picard/createsequencedictionary/** diff --git a/tests/pipeline/germlinecnvcaller_cohort.nf.test b/tests/pipeline/germlinecnvcaller_cohort.nf.test index 5b451a2..0dedd87 100644 --- a/tests/pipeline/germlinecnvcaller_cohort.nf.test +++ b/tests/pipeline/germlinecnvcaller_cohort.nf.test @@ -16,23 +16,25 @@ nextflow_workflow { fai = null dict = null gcnv_scatter_content = 2 - gcnv_pon_name = 'cohort' + gcnv_model_name = 'cohort' gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" } workflow { """ - input[0] = Channel.empty() - input[1] = Channel.empty() - input[2] = Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() - input[3] = Channel.of( + input[0] = Channel.empty() + input[1] = Channel.empty() + input[2] = Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() + input[3] = Channel.of( [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"),[]], [[ id:'test2' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam"),[]]) - input[4] = Channel.fromPath(params.gcnv_ploidy_priors) - input[5] = Channel.value([[:],[]]) - input[6] = Channel.value([[:],[]]) - input[7] = Channel.value([[:],[]]) - input[8] = Channel.value([[:],[]]) - input[9] = params.gcnv_pon_name + input[4] = Channel.fromPath(params.gcnv_ploidy_priors) + input[5] = Channel.value([[:],[]]) + input[6] = Channel.value([[:],[]]) + input[7] = Channel.value([[:],[]]) + input[8] = Channel.value([[:],[]]) + input[9] = Channel.value([[:],[]]) + input[10] = Channel.value([[:],[]]) + input[11] = params.gcnv_model_name """ } } diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index f4a2a94..5ab0497 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -29,24 +29,31 @@ include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch/ma include { MULTIQC } from '../modules/nf-core/multiqc/main' // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName], targets]}.collect() - : Channel.value([[id:'null'], []]) -ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName], dict]}.collect() +ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName], dict]}.collect() : Channel.empty() -ch_gcnv_exclude_bed = params.gcnv_exclude_bed ? Channel.fromPath(params.gcnv_exclude_bed).map { exclude -> [[id:exclude.baseName], exclude]}.collect() - : Channel.value([[id:'null'], []]) -ch_gcnv_exclude_interval_list = params.gcnv_exclude_interval_list ? Channel.fromPath(params.gcnv_exclude_interval_list).map { exclude -> [[id:exclude.baseName], exclude]}.collect() - : Channel.value([[id:'null'], []]) -ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName], fai]}.collect() +ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName], fai]}.collect() : Channel.empty() -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName], fasta]}.collect() +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName], fasta]}.collect() : Channel.empty() -ch_gcnv_ploidy_priors = params.gcnv_ploidy_priors ? Channel.fromPath(params.gcnv_ploidy_priors).collect() +// Initialize cnvkit specific parameters +ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName], targets]}.collect() + : Channel.value([[id:'null'], []]) +// Initialize germlinecnvcaller specific parameters +ch_gcnv_exclude_bed = params.gcnv_exclude_bed ? Channel.fromPath(params.gcnv_exclude_bed).map { exclude -> [[id:exclude.baseName], exclude]}.collect() + : Channel.value([[id:'null'], []]) +ch_gcnv_exclude_interval_list = params.gcnv_exclude_interval_list ? Channel.fromPath(params.gcnv_exclude_interval_list).map { exclude -> [[id:exclude.baseName], exclude]}.collect() + : Channel.value([[id:'null'], []]) +ch_gcnv_mappable_regions = params.gcnv_mappable_regions ? Channel.fromPath(params.gcnv_mappable_regions).collect() + : Channel.value([[id:'null'], []]) +ch_gcnv_ploidy_priors = params.gcnv_ploidy_priors ? Channel.fromPath(params.gcnv_ploidy_priors).collect() : Channel.empty() -ch_gcnv_target_bed = params.gcnv_target_bed ? Channel.fromPath(params.gcnv_target_bed).map { targets -> [[id:targets.baseName], targets]}.collect() +ch_gcnv_target_bed = params.gcnv_target_bed ? Channel.fromPath(params.gcnv_target_bed).map { targets -> [[id:targets.baseName], targets]}.collect() + : Channel.value([[id:'null'], []]) +ch_gcnv_target_interval_list = params.gcnv_target_interval_list ? Channel.fromPath(params.gcnv_target_interval_list).map { targets -> [[id:targets.baseName], targets]}.collect() : Channel.value([[id:'null'], []]) -ch_gcnv_target_interval_list = params.gcnv_target_interval_list ? Channel.fromPath(params.gcnv_target_interval_list).map { targets -> [[id:targets.baseName], targets]}.collect() +ch_gcnv_segmental_duplications = params.gcnv_segmental_duplications ? Channel.fromPath(params.gcnv_segmental_duplications).collect() : Channel.value([[id:'null'], []]) +// Initialize mutect2 specific parameters ch_mutect2_target_bed = params.mutect2_target_bed ? Channel.fromPath(params.mutect2_target_bed).collect() : Channel.value([]) @@ -102,11 +109,13 @@ workflow CREATEPANELREFS { ch_fasta, ch_germlinecnvcaller_input, ch_gcnv_ploidy_priors, + ch_gcnv_mappable_regions, + ch_gcnv_segmental_duplications, ch_gcnv_target_bed, ch_gcnv_target_interval_list, ch_gcnv_exclude_bed, ch_gcnv_exclude_interval_list, - params.gcnv_pon_name ) + params.gcnv_model_name ) ch_versions = ch_versions.mix(GERMLINECNVCALLER_COHORT.out.versions) } From a35e3238aadebee1ae427e752337882c50d9dcc2 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 6 Apr 2024 15:22:42 +0200 Subject: [PATCH 115/234] fix lint error --- main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/main.nf b/main.nf index 26bc76b..4328479 100644 --- a/main.nf +++ b/main.nf @@ -25,6 +25,7 @@ params.dict = getGenomeAttribute('dict') params.gcnv_exclude_bed = getGenomeAttribute('gcnv_exclude_bed') params.gcnv_exclude_interval_list = getGenomeAttribute('gcnv_exclude_interval_list') params.gcnv_mappable_regions = getGenomeAttribute('gcnv_mappable_regions') +params.gcnv_segmental_duplications = getGenomeAttribute('gcnv_segmental_duplications') params.gcnv_target_bed = getGenomeAttribute('gcnv_target_bed') params.gcnv_target_interval_list = getGenomeAttribute('gcnv_target_interval_list') params.gcnv_ploidy_priors = getGenomeAttribute('gcnv_ploidy_priors') From 1c62be29a276c01536ccfa6cb02d4afda4acff33 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 8 May 2024 17:10:13 +0200 Subject: [PATCH 116/234] forgotten merge conflicts --- nextflow.config | 7 ------- 1 file changed, 7 deletions(-) diff --git a/nextflow.config b/nextflow.config index c3985ff..67fee20 100644 --- a/nextflow.config +++ b/nextflow.config @@ -111,11 +111,7 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false -<<<<<<< HEAD conda.channels = ['conda-forge', 'bioconda', 'defaults'] -======= - channels = ['conda-forge', 'bioconda', 'defaults'] ->>>>>>> dev apptainer.enabled = false } mamba { @@ -187,7 +183,6 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false -<<<<<<< HEAD } wave { apptainer.ociAutoPull = true @@ -195,8 +190,6 @@ profiles { wave.enabled = true wave.freeze = true wave.strategy = 'conda,container' -======= ->>>>>>> dev } gitpod { executor.name = 'local' From 1d7fccabfc4df52de92165bfdb57d433c8988381 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 8 May 2024 17:13:36 +0200 Subject: [PATCH 117/234] fix linting --- .nf-core.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.nf-core.yml b/.nf-core.yml index fcea61c..6f461fe 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -7,3 +7,4 @@ lint: files_unchanged: - .gitattributes - .gitignore + modules_config: False From 6268672a562987226b58e8f871c33ea738033f11 Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Wed, 8 May 2024 17:22:59 +0200 Subject: [PATCH 118/234] Apply suggestions from code review --- workflows/createpanelrefs.nf | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 09cc50a..5ab0497 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -161,25 +161,18 @@ workflow CREATEPANELREFS { // Collate and save software versions // softwareVersionsToYAML(ch_versions) - .collectFile( - storeDir: "${params.outdir}/pipeline_info", - name: 'nf_core_pipeline_software_mqc_versions.yml', - sort: true, - newLine: true - ).set { ch_collated_versions } + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } // // MODULE: MultiQC // - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) MULTIQC ( ch_multiqc_files.collect(), From 97ef01db1f342d3f7e800f14048ff4b0579967bf Mon Sep 17 00:00:00 2001 From: jemten Date: Mon, 10 Jun 2024 16:06:15 +0200 Subject: [PATCH 119/234] updates the default args for gens --- CHANGELOG.md | 1 + conf/modules/gens_pon.config | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bacac6a..733f7cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#19](https://github.com/nf-core/createpanelrefs/pull/19) - Updates germlinecnvcaller subworkflow to handle exome samples - [#24](https://github.com/nf-core/createpanelrefs/pull/24) - Updates germlinecnvcaller subworkflow to use mappability and segmental duplications track - [#24](https://github.com/nf-core/createpanelrefs/pull/24) - Updates germlinecnvcaller and gens subworkflows to use custom names for panel of normals. +- [#28](https://github.com/nf-core/createpanelrefs/pull/28) - Updates default args for gens subworkflow ### `Fixed` diff --git a/conf/modules/gens_pon.config b/conf/modules/gens_pon.config index 5ea92b5..e4ed2cc 100644 --- a/conf/modules/gens_pon.config +++ b/conf/modules/gens_pon.config @@ -52,8 +52,8 @@ process { } withName: '.*GENS_PON:GATK4_CREATEREADCOUNTPANELOFNORMALS' { - ext.args = { ["--minimum-interval-median-percentile 10.0", - "--maximum-chunk-size 29349635"].join(" ")} + ext.args = { ["--minimum-interval-median-percentile 5.0", + "--maximum-chunk-size 167772150"].join(" ")} publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/gens_pon/createreadcountpanelofnormals" }, From eb79a2f1cabf08f2631abcf2a0f75fd3c6fcdfd9 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 11 Jun 2024 16:28:17 +0200 Subject: [PATCH 120/234] update modules --- modules.json | 30 +++++----- modules/nf-core/cnvkit/batch/environment.yml | 5 +- modules/nf-core/cnvkit/batch/main.nf | 11 +++- modules/nf-core/cnvkit/batch/meta.yml | 8 ++- .../gatk4/annotateintervals/environment.yml | 2 +- .../nf-core/gatk4/annotateintervals/main.nf | 4 +- .../annotateintervals/tests/main.nf.test.snap | 58 +++++++++++++------ .../gatk4/bedtointervallist/environment.yml | 2 +- .../nf-core/gatk4/bedtointervallist/main.nf | 4 +- .../gatk4/collectreadcounts/environment.yml | 2 +- .../nf-core/gatk4/collectreadcounts/main.nf | 4 +- .../environment.yml | 2 +- .../createreadcountpanelofnormals/main.nf | 4 +- .../environment.yml | 5 -- .../determinegermlinecontigploidy/main.nf | 4 +- .../gatk4/filterintervals/environment.yml | 2 +- modules/nf-core/gatk4/filterintervals/main.nf | 4 +- .../gatk4/germlinecnvcaller/environment.yml | 5 -- .../nf-core/gatk4/germlinecnvcaller/main.nf | 6 +- .../gatk4/intervallisttools/environment.yml | 2 +- .../nf-core/gatk4/intervallisttools/main.nf | 4 +- .../gatk4/preprocessintervals/environment.yml | 2 +- .../nf-core/gatk4/preprocessintervals/main.nf | 4 +- modules/nf-core/multiqc/environment.yml | 2 +- modules/nf-core/multiqc/main.nf | 4 +- .../nf-core/multiqc/tests/main.nf.test.snap | 18 +++--- .../nf-core/samtools/faidx/environment.yml | 5 +- modules/nf-core/samtools/faidx/main.nf | 4 +- modules/nf-core/samtools/faidx/meta.yml | 4 ++ .../nf-core/samtools/index/environment.yml | 3 +- modules/nf-core/samtools/index/main.nf | 4 +- .../nf-core/samtools/index/tests/main.nf.test | 36 ++++++------ .../samtools/index/tests/main.nf.test.snap | 54 +++++++++++++++-- 33 files changed, 196 insertions(+), 112 deletions(-) delete mode 100644 modules/nf-core/gatk4/determinegermlinecontigploidy/environment.yml delete mode 100644 modules/nf-core/gatk4/germlinecnvcaller/environment.yml diff --git a/modules.json b/modules.json index 2d4ca0a..e6a9a92 100644 --- a/modules.json +++ b/modules.json @@ -7,27 +7,27 @@ "nf-core": { "cnvkit/batch": { "branch": "master", - "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "git_sha": "f53b071421340e6fac0806c86ba030e578e94826", "installed_by": ["modules"] }, "gatk4/annotateintervals": { "branch": "master", - "git_sha": "42ae163c3c6eb23646189c30c07a889ad39c9b0e", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/bedtointervallist": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "d3f215802f696f7993f25c759781d2db91232015", "installed_by": ["modules"] }, "gatk4/collectreadcounts": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/createreadcountpanelofnormals": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/createsomaticpanelofnormals": { @@ -37,12 +37,12 @@ }, "gatk4/determinegermlinecontigploidy": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "60a7dbae179bcfa24c10294cc9a07423a239c19a", "installed_by": ["modules"] }, "gatk4/filterintervals": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/genomicsdbimport": { @@ -52,32 +52,32 @@ }, "gatk4/germlinecnvcaller": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "cf607b7749da0a8f5ca2a1e31233e13e3159e2fe", "installed_by": ["modules"] }, "gatk4/indexfeaturefile": { "branch": "master", - "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", + "git_sha": "108e1f5bafc045ac19890852a41e8d95ae12aa67", "installed_by": ["modules"] }, "gatk4/intervallisttools": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "afb604624abcc2fc67a43d70a2de369a50d16105", "installed_by": ["modules"] }, "gatk4/mutect2": { "branch": "master", - "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", + "git_sha": "5fd04feb37b58caa6a54d41e38c80066bdf71056", "installed_by": ["bam_create_som_pon_gatk"] }, "gatk4/preprocessintervals": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", + "git_sha": "8f2062e7b4185590fb9f43c275381a31a6544fc0", "installed_by": ["modules"] }, "picard/createsequencedictionary": { @@ -87,12 +87,12 @@ }, "samtools/faidx": { "branch": "master", - "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", "installed_by": ["modules"] }, "samtools/index": { "branch": "master", - "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", "installed_by": ["modules"] } } diff --git a/modules/nf-core/cnvkit/batch/environment.yml b/modules/nf-core/cnvkit/batch/environment.yml index a73ba50..10c5d6b 100644 --- a/modules/nf-core/cnvkit/batch/environment.yml +++ b/modules/nf-core/cnvkit/batch/environment.yml @@ -1,8 +1,11 @@ name: cnvkit_batch + channels: - conda-forge - bioconda - defaults + dependencies: - bioconda::cnvkit=0.9.10 - - bioconda::samtools=1.18 + - bioconda::htslib=1.19.1 + - bioconda::samtools=1.19.2 diff --git a/modules/nf-core/cnvkit/batch/main.nf b/modules/nf-core/cnvkit/batch/main.nf index 33a1dfa..9e8aafa 100644 --- a/modules/nf-core/cnvkit/batch/main.nf +++ b/modules/nf-core/cnvkit/batch/main.nf @@ -4,8 +4,8 @@ process CNVKIT_BATCH { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:f0ad4c1f9b5c0749c12c7a93277ad6f4319b6b72-0' : - 'biocontainers/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:f0ad4c1f9b5c0749c12c7a93277ad6f4319b6b72-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:c94363856059151a2974dc501fb07a0360cc60a3-0' : + 'biocontainers/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:c94363856059151a2974dc501fb07a0360cc60a3-0' }" input: tuple val(meta), path(tumor), path(normal) @@ -32,6 +32,7 @@ process CNVKIT_BATCH { def tumor_exists = tumor ? true : false def normal_exists = normal ? true : false + def reference_exists = reference ? true : false // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false @@ -62,6 +63,10 @@ process CNVKIT_BATCH { else { normal_args = normal_prefix ? "--normal $normal_out" : "" } + if (reference_exists){ + fasta_args = "" + normal_args = "" + } } // generation of panel of normals @@ -73,7 +78,7 @@ process CNVKIT_BATCH { tumor_out = "" } - def target_args = targets ? "--targets $targets" : "" + def target_args = targets && !reference_exists ? "--targets $targets" : "" def reference_args = reference ? "--reference $reference" : "" def samtools_cram_convert = '' diff --git a/modules/nf-core/cnvkit/batch/meta.yml b/modules/nf-core/cnvkit/batch/meta.yml index 4f88ba3..f14efe5 100644 --- a/modules/nf-core/cnvkit/batch/meta.yml +++ b/modules/nf-core/cnvkit/batch/meta.yml @@ -18,7 +18,7 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - tumour: + - tumor: type: file description: | Input tumour sample bam file (or cram) @@ -49,7 +49,7 @@ input: description: | Groovy Map containing information about target file e.g. [ id:'test' ] - - targetfile: + - targets: type: file description: | Input target bed file @@ -62,6 +62,10 @@ input: type: file description: | Input reference cnn-file (only for germline and tumor-only running) + - panel_of_normals: + type: file + description: | + Input panel of normals file output: - meta: type: map diff --git a/modules/nf-core/gatk4/annotateintervals/environment.yml b/modules/nf-core/gatk4/annotateintervals/environment.yml index 6e91596..523753c 100644 --- a/modules/nf-core/gatk4/annotateintervals/environment.yml +++ b/modules/nf-core/gatk4/annotateintervals/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::gatk4=4.4.0.0 + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/annotateintervals/main.nf b/modules/nf-core/gatk4/annotateintervals/main.nf index da7acd0..dfe9dc2 100644 --- a/modules/nf-core/gatk4/annotateintervals/main.nf +++ b/modules/nf-core/gatk4/annotateintervals/main.nf @@ -4,8 +4,8 @@ process GATK4_ANNOTATEINTERVALS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(intervals) diff --git a/modules/nf-core/gatk4/annotateintervals/tests/main.nf.test.snap b/modules/nf-core/gatk4/annotateintervals/tests/main.nf.test.snap index 16f81c3..749f6fb 100644 --- a/modules/nf-core/gatk4/annotateintervals/tests/main.nf.test.snap +++ b/modules/nf-core/gatk4/annotateintervals/tests/main.nf.test.snap @@ -12,7 +12,7 @@ ] ], "1": [ - "versions.yml:md5,e8bc10e7a98c24f21dd84b52f6e09d79" + "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" ], "annotated_intervals": [ [ @@ -24,11 +24,15 @@ ] ], "versions": [ - "versions.yml:md5,e8bc10e7a98c24f21dd84b52f6e09d79" + "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" ] } ], - "timestamp": "2023-12-31T01:23:13.232383464" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T16:07:36.32193" }, "homo_sapiens genome [interval_list]": { "content": [ @@ -43,7 +47,7 @@ ] ], "1": [ - "versions.yml:md5,e8bc10e7a98c24f21dd84b52f6e09d79" + "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" ], "annotated_intervals": [ [ @@ -55,11 +59,15 @@ ] ], "versions": [ - "versions.yml:md5,e8bc10e7a98c24f21dd84b52f6e09d79" + "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" ] } ], - "timestamp": "2023-12-31T01:23:44.26211215" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T16:08:12.533176" }, "homo_sapiens genome multi-interval [bed]": { "content": [ @@ -74,7 +82,7 @@ ] ], "1": [ - "versions.yml:md5,e8bc10e7a98c24f21dd84b52f6e09d79" + "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" ], "annotated_intervals": [ [ @@ -86,11 +94,15 @@ ] ], "versions": [ - "versions.yml:md5,e8bc10e7a98c24f21dd84b52f6e09d79" + "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" ] } ], - "timestamp": "2023-12-31T01:23:28.481436232" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T16:07:54.643276" }, "homo_sapiens genome [interval_list] mappable_regions": { "content": [ @@ -105,7 +117,7 @@ ] ], "1": [ - "versions.yml:md5,e8bc10e7a98c24f21dd84b52f6e09d79" + "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" ], "annotated_intervals": [ [ @@ -117,20 +129,28 @@ ] ], "versions": [ - "versions.yml:md5,e8bc10e7a98c24f21dd84b52f6e09d79" + "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" ] } ], - "timestamp": "2023-12-31T01:24:00.492783484" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T16:08:31.311782" }, "homo_sapiens genome [bed] - stub": { "content": [ [ "test.tsv", - "versions.yml:md5,e8bc10e7a98c24f21dd84b52f6e09d79" + "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" ] ], - "timestamp": "2023-12-31T01:24:29.485750561" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T16:09:04.54745" }, "homo_sapiens genome [interval_list] duplication_regions": { "content": [ @@ -145,7 +165,7 @@ ] ], "1": [ - "versions.yml:md5,e8bc10e7a98c24f21dd84b52f6e09d79" + "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" ], "annotated_intervals": [ [ @@ -157,10 +177,14 @@ ] ], "versions": [ - "versions.yml:md5,e8bc10e7a98c24f21dd84b52f6e09d79" + "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" ] } ], - "timestamp": "2023-12-31T01:24:15.933897396" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T16:08:49.684766" } } \ No newline at end of file diff --git a/modules/nf-core/gatk4/bedtointervallist/environment.yml b/modules/nf-core/gatk4/bedtointervallist/environment.yml index e7cb428..d6fbe2e 100644 --- a/modules/nf-core/gatk4/bedtointervallist/environment.yml +++ b/modules/nf-core/gatk4/bedtointervallist/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::gatk4=4.4.0.0 + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/bedtointervallist/main.nf b/modules/nf-core/gatk4/bedtointervallist/main.nf index 88b24b1..68863d6 100644 --- a/modules/nf-core/gatk4/bedtointervallist/main.nf +++ b/modules/nf-core/gatk4/bedtointervallist/main.nf @@ -4,8 +4,8 @@ process GATK4_BEDTOINTERVALLIST { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(bed) diff --git a/modules/nf-core/gatk4/collectreadcounts/environment.yml b/modules/nf-core/gatk4/collectreadcounts/environment.yml index ebc8380..d09cd89 100644 --- a/modules/nf-core/gatk4/collectreadcounts/environment.yml +++ b/modules/nf-core/gatk4/collectreadcounts/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::gatk4=4.4.0.0 + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/collectreadcounts/main.nf b/modules/nf-core/gatk4/collectreadcounts/main.nf index 1cc3694..2c545d2 100644 --- a/modules/nf-core/gatk4/collectreadcounts/main.nf +++ b/modules/nf-core/gatk4/collectreadcounts/main.nf @@ -4,8 +4,8 @@ process GATK4_COLLECTREADCOUNTS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(input), path(input_index), path(intervals) diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml b/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml index ea5b9bf..b4f4e77 100644 --- a/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::gatk4=4.4.0.0 + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf b/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf index 9d32a99..332f9d9 100644 --- a/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf @@ -4,8 +4,8 @@ process GATK4_CREATEREADCOUNTPANELOFNORMALS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(counts) diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/environment.yml b/modules/nf-core/gatk4/determinegermlinecontigploidy/environment.yml deleted file mode 100644 index c30de09..0000000 --- a/modules/nf-core/gatk4/determinegermlinecontigploidy/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: gatk4_determinegermlinecontigploidy -channels: - - conda-forge - - bioconda - - defaults diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf b/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf index 5af101f..71a67f7 100644 --- a/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf @@ -4,7 +4,7 @@ process GATK4_DETERMINEGERMLINECONTIGPLOIDY { label 'process_single' //Conda is not supported at the moment: https://github.com/broadinstitute/gatk/issues/7811 - container "nf-core/gatk:4.4.0.0" //Biocontainers is missing a package + container "nf-core/gatk:4.5.0.0" //Biocontainers is missing a package input: tuple val(meta), path(counts), path(bed), path(exclude_beds) @@ -39,6 +39,8 @@ process GATK4_DETERMINEGERMLINECONTIGPLOIDY { avail_mem = (task.memory.mega*0.8).intValue() } """ + export THEANO_FLAGS="base_compiledir=\$PWD" + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ DetermineGermlineContigPloidy \\ ${input_list} \\ diff --git a/modules/nf-core/gatk4/filterintervals/environment.yml b/modules/nf-core/gatk4/filterintervals/environment.yml index a1d59f2..853995e 100644 --- a/modules/nf-core/gatk4/filterintervals/environment.yml +++ b/modules/nf-core/gatk4/filterintervals/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::gatk4=4.4.0.0 + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/filterintervals/main.nf b/modules/nf-core/gatk4/filterintervals/main.nf index a6d4702..bf1f3a0 100644 --- a/modules/nf-core/gatk4/filterintervals/main.nf +++ b/modules/nf-core/gatk4/filterintervals/main.nf @@ -4,8 +4,8 @@ process GATK4_FILTERINTERVALS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(intervals) diff --git a/modules/nf-core/gatk4/germlinecnvcaller/environment.yml b/modules/nf-core/gatk4/germlinecnvcaller/environment.yml deleted file mode 100644 index 02ce4dd..0000000 --- a/modules/nf-core/gatk4/germlinecnvcaller/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: gatk4_germlinecnvcaller -channels: - - conda-forge - - bioconda - - defaults diff --git a/modules/nf-core/gatk4/germlinecnvcaller/main.nf b/modules/nf-core/gatk4/germlinecnvcaller/main.nf index f42ceb6..cd1916e 100644 --- a/modules/nf-core/gatk4/germlinecnvcaller/main.nf +++ b/modules/nf-core/gatk4/germlinecnvcaller/main.nf @@ -3,7 +3,7 @@ process GATK4_GERMLINECNVCALLER { label 'process_single' //Conda is not supported at the moment: https://github.com/broadinstitute/gatk/issues/7811 - container "nf-core/gatk:4.4.0.0" //Biocontainers is missing a package + container "nf-core/gatk:4.5.0.0" //Biocontainers is missing a package input: tuple val(meta), path(tsv), path(intervals), path(ploidy), path(model) @@ -37,7 +37,9 @@ process GATK4_GERMLINECNVCALLER { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g -XX:-UsePerfData" \\ + export THEANO_FLAGS="base_compiledir=\$PWD" + + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ GermlineCNVCaller \\ $input_list \\ $ploidy_command \\ diff --git a/modules/nf-core/gatk4/intervallisttools/environment.yml b/modules/nf-core/gatk4/intervallisttools/environment.yml index b70e5e1..a4026f9 100644 --- a/modules/nf-core/gatk4/intervallisttools/environment.yml +++ b/modules/nf-core/gatk4/intervallisttools/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::gatk4=4.4.0.0 + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/intervallisttools/main.nf b/modules/nf-core/gatk4/intervallisttools/main.nf index 10baa75..400fa03 100644 --- a/modules/nf-core/gatk4/intervallisttools/main.nf +++ b/modules/nf-core/gatk4/intervallisttools/main.nf @@ -4,8 +4,8 @@ process GATK4_INTERVALLISTTOOLS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(intervals) diff --git a/modules/nf-core/gatk4/preprocessintervals/environment.yml b/modules/nf-core/gatk4/preprocessintervals/environment.yml index 542d9ef..ec0b09e 100644 --- a/modules/nf-core/gatk4/preprocessintervals/environment.yml +++ b/modules/nf-core/gatk4/preprocessintervals/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::gatk4=4.4.0.0 + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/preprocessintervals/main.nf b/modules/nf-core/gatk4/preprocessintervals/main.nf index 61b281d..dffc4bb 100644 --- a/modules/nf-core/gatk4/preprocessintervals/main.nf +++ b/modules/nf-core/gatk4/preprocessintervals/main.nf @@ -4,8 +4,8 @@ process GATK4_PREPROCESSINTERVALS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index ca39fb6..72e598b 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.21 + - bioconda::multiqc=1.22.2 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 47ac352..e59efef 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : - 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.22.2--pyhdfd78af_0' : + 'biocontainers/multiqc:1.22.2--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index bfebd80..a170c31 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,ddbc971a8307f9b9b7b973714cde29d0" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-29T08:48:55.657331" + "timestamp": "2024-06-10T11:50:10.874341679" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,ddbc971a8307f9b9b7b973714cde29d0" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-29T08:49:49.071937" + "timestamp": "2024-06-10T11:50:49.271943761" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,ddbc971a8307f9b9b7b973714cde29d0" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-29T08:49:25.457567" + "timestamp": "2024-06-10T11:50:34.046706025" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml index 01ccbcc..f8450fa 100644 --- a/modules/nf-core/samtools/faidx/environment.yml +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -1,7 +1,10 @@ name: samtools_faidx + channels: - conda-forge - bioconda - defaults + dependencies: - - bioconda::samtools=1.18 + - bioconda::htslib=1.20 + - bioconda::samtools=1.20 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index d346162..bdcdbc9 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_FAIDX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : - 'biocontainers/samtools:1.18--h50ea8bc_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index e189af2..f3c25de 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -39,6 +39,10 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - fa: + type: file + description: FASTA file + pattern: "*.{fa}" - fai: type: file description: FASTA index file diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml index 296ed99..260d516 100644 --- a/modules/nf-core/samtools/index/environment.yml +++ b/modules/nf-core/samtools/index/environment.yml @@ -4,4 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::samtools=1.18 + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 8ad18fd..b523c21 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_INDEX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : - 'biocontainers/samtools:1.18--h50ea8bc_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test index c76a916..bb7756d 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -8,7 +8,7 @@ nextflow_process { tag "samtools" tag "samtools/index" - test("sarscov2 [BAI]") { + test("bai") { when { params { @@ -16,10 +16,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) """ } } @@ -28,12 +28,12 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot(process.out.bai).match("bai") }, - { assert path(process.out.versions.get(0)).getText().contains("samtools") } + { assert snapshot(process.out.versions).match("bai_versions") } ) } } - test("homo_sapiens [CRAI]") { + test("crai") { when { params { @@ -41,10 +41,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) """ } } @@ -53,12 +53,12 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot(process.out.crai).match("crai") }, - { assert path(process.out.versions.get(0)).getText().contains("samtools") } + { assert snapshot(process.out.versions).match("crai_versions") } ) } } - test("homo_sapiens [CSI]") { + test("csi") { config "./csi.nextflow.config" @@ -68,10 +68,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) """ } } @@ -80,7 +80,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert path(process.out.csi.get(0).get(1)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("samtools") } + { assert snapshot(process.out.versions).match("csi_versions") } ) } } diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap index b3baee7..52756e8 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -1,28 +1,74 @@ { + "crai_versions": { + "content": [ + [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:04.203740976" + }, + "csi_versions": { + "content": [ + [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:09.57475878" + }, "crai": { "content": [ [ [ { - "id": "test" + "id": "test", + "single_end": false }, "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" ] ] ], - "timestamp": "2023-11-15T15:17:37.30801" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:41:38.446424" }, "bai": { "content": [ [ [ { - "id": "test" + "id": "test", + "single_end": false }, "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" ] ] ], - "timestamp": "2023-11-15T15:17:30.869234" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:40:46.579747" + }, + "bai_versions": { + "content": [ + [ + "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:41:57.929287369" } } \ No newline at end of file From 6b48b1310cd312b9e50d9433dfc3f3542c9e5f7c Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 11 Jun 2024 16:54:59 +0200 Subject: [PATCH 121/234] more updates --- .../cnvkit/batch/tests/batch_hybrid.config | 6 + .../cnvkit/batch/tests/batch_pon.config | 6 + .../batch/tests/batch_tumouronly.config | 6 + .../cnvkit/batch/tests/batch_wgs.config | 6 + .../nf-core/cnvkit/batch/tests/main.nf.test | 284 ++++++++++++++ .../cnvkit/batch/tests/main.nf.test.snap | 86 +++++ modules/nf-core/cnvkit/batch/tests/tags.yml | 2 + .../bedtointervallist/tests/main.nf.test | 38 ++ .../bedtointervallist/tests/main.nf.test.snap | 35 ++ .../gatk4/bedtointervallist/tests/tags.yml | 2 + .../determinegermlinecontigploidy/README.md | 9 + .../nf-core/gatk4/germlinecnvcaller/README.md | 9 + .../gatk4/indexfeaturefile/tests/main.nf.test | 105 +++++ .../indexfeaturefile/tests/main.nf.test.snap | 132 +++++++ .../gatk4/indexfeaturefile/tests/tags.yml | 2 + .../intervallisttools/tests/main.nf.test | 72 ++++ .../intervallisttools/tests/main.nf.test.snap | 88 +++++ .../intervallisttools/tests/nextflow.config | 5 + .../gatk4/intervallisttools/tests/tags.yml | 2 + .../nf-core/gatk4/mutect2/tests/f1r2.config | 3 + .../nf-core/gatk4/mutect2/tests/main.nf.test | 360 ++++++++++++++++++ .../gatk4/mutect2/tests/main.nf.test.snap | 204 ++++++++++ .../nf-core/gatk4/mutect2/tests/mito.config | 3 + .../nf-core/gatk4/mutect2/tests/pair.config | 3 + modules/nf-core/gatk4/mutect2/tests/tags.yml | 2 + .../nf-core/samtools/faidx/tests/main.nf.test | 122 ++++++ .../samtools/faidx/tests/main.nf.test.snap | 249 ++++++++++++ .../samtools/faidx/tests/nextflow.config | 7 + .../samtools/faidx/tests/nextflow2.config | 6 + modules/nf-core/samtools/faidx/tests/tags.yml | 2 + tests/pipeline/cnvkit.nf.test.snap | 20 +- tests/test_assets/gens_software_versions.yaml | 10 +- .../germlinecnvcaller_software_versions.yaml | 18 +- 33 files changed, 1884 insertions(+), 20 deletions(-) create mode 100644 modules/nf-core/cnvkit/batch/tests/batch_hybrid.config create mode 100644 modules/nf-core/cnvkit/batch/tests/batch_pon.config create mode 100644 modules/nf-core/cnvkit/batch/tests/batch_tumouronly.config create mode 100644 modules/nf-core/cnvkit/batch/tests/batch_wgs.config create mode 100644 modules/nf-core/cnvkit/batch/tests/main.nf.test create mode 100644 modules/nf-core/cnvkit/batch/tests/main.nf.test.snap create mode 100644 modules/nf-core/cnvkit/batch/tests/tags.yml create mode 100644 modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test create mode 100644 modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap create mode 100644 modules/nf-core/gatk4/bedtointervallist/tests/tags.yml create mode 100644 modules/nf-core/gatk4/determinegermlinecontigploidy/README.md create mode 100644 modules/nf-core/gatk4/germlinecnvcaller/README.md create mode 100644 modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test create mode 100644 modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test.snap create mode 100644 modules/nf-core/gatk4/indexfeaturefile/tests/tags.yml create mode 100644 modules/nf-core/gatk4/intervallisttools/tests/main.nf.test create mode 100644 modules/nf-core/gatk4/intervallisttools/tests/main.nf.test.snap create mode 100644 modules/nf-core/gatk4/intervallisttools/tests/nextflow.config create mode 100644 modules/nf-core/gatk4/intervallisttools/tests/tags.yml create mode 100644 modules/nf-core/gatk4/mutect2/tests/f1r2.config create mode 100644 modules/nf-core/gatk4/mutect2/tests/main.nf.test create mode 100644 modules/nf-core/gatk4/mutect2/tests/main.nf.test.snap create mode 100644 modules/nf-core/gatk4/mutect2/tests/mito.config create mode 100644 modules/nf-core/gatk4/mutect2/tests/pair.config create mode 100644 modules/nf-core/gatk4/mutect2/tests/tags.yml create mode 100644 modules/nf-core/samtools/faidx/tests/main.nf.test create mode 100644 modules/nf-core/samtools/faidx/tests/main.nf.test.snap create mode 100644 modules/nf-core/samtools/faidx/tests/nextflow.config create mode 100644 modules/nf-core/samtools/faidx/tests/nextflow2.config create mode 100644 modules/nf-core/samtools/faidx/tests/tags.yml diff --git a/modules/nf-core/cnvkit/batch/tests/batch_hybrid.config b/modules/nf-core/cnvkit/batch/tests/batch_hybrid.config new file mode 100644 index 0000000..07a21b1 --- /dev/null +++ b/modules/nf-core/cnvkit/batch/tests/batch_hybrid.config @@ -0,0 +1,6 @@ +process { + + withName: CNVKIT_BATCH { + ext.args = '--output-reference reference.cnn' + } +} diff --git a/modules/nf-core/cnvkit/batch/tests/batch_pon.config b/modules/nf-core/cnvkit/batch/tests/batch_pon.config new file mode 100644 index 0000000..3897370 --- /dev/null +++ b/modules/nf-core/cnvkit/batch/tests/batch_pon.config @@ -0,0 +1,6 @@ +process { + + withName: CNVKIT_BATCH { + ext.args = '--method wgs --output-reference panel_of_normals.cnn' + } +} diff --git a/modules/nf-core/cnvkit/batch/tests/batch_tumouronly.config b/modules/nf-core/cnvkit/batch/tests/batch_tumouronly.config new file mode 100644 index 0000000..91074ff --- /dev/null +++ b/modules/nf-core/cnvkit/batch/tests/batch_tumouronly.config @@ -0,0 +1,6 @@ +process { + + withName: CNVKIT_BATCH { + ext.args = '--method wgs' + } +} diff --git a/modules/nf-core/cnvkit/batch/tests/batch_wgs.config b/modules/nf-core/cnvkit/batch/tests/batch_wgs.config new file mode 100644 index 0000000..48fc879 --- /dev/null +++ b/modules/nf-core/cnvkit/batch/tests/batch_wgs.config @@ -0,0 +1,6 @@ +process { + + withName: CNVKIT_BATCH { + ext.args = '--output-reference reference.cnn --method wgs' + } +} diff --git a/modules/nf-core/cnvkit/batch/tests/main.nf.test b/modules/nf-core/cnvkit/batch/tests/main.nf.test new file mode 100644 index 0000000..b2c0a9b --- /dev/null +++ b/modules/nf-core/cnvkit/batch/tests/main.nf.test @@ -0,0 +1,284 @@ +nextflow_process { + + name "Test Process CNVKIT_BATCH" + script "../main.nf" + process "CNVKIT_BATCH" + + tag "modules" + tag "modules_nfcore" + tag "cnvkit" + tag "cnvkit/batch" + + test("cnvkit batch hybrid mode - bam") { + + config "./batch_hybrid.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) + ] + input[1] = [[:],file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[2] = [[:],[]] + input[3] = [[:],file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true)] + input[4] = [[:],[]] + input[5] = false + """ + } + } + + then { + println process.out.bed[0][1] + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match() } + ) + } + + } + + test("cnvkit batch wgs - bam") { + + config "./batch_wgs.config" + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + input[1] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + input[2] = [[:],[]] + input[3] = [[:],[]] + input[4] = [[:],[]] + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match() } + ) + } + + } + + test("cnvkit batch wgs - cram") { + + config "./batch_wgs.config" + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) + ] + input[1] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + input[2] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)] + input[3] = [[:],[]] + input[4] = [[:],[]] + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match() } + ) + } + + } + + test("cnvkit batch tumouronly mode - bam") { + + config "./batch_tumouronly.config" + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true), + [] + ] + input[1] = [[:],[]] + input[2] = [[:],[]] + input[3] = [[:],[]] + input[4] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true)] + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match() } + ) + } + + } + + test("cnvkit batch tumouronly mode - cram") { + + config "./batch_tumouronly.config" + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + [] + ] + input[1] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + input[2] = [[:],[]] + input[3] = [[:],[]] + input[4] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true)] + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match() } + ) + } + + } + + test("cnvkit batch germline mode - cram") { + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + [], + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true) + ] + input[1] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)] + input[2] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)] + input[3] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true)] + input[4] = [[:],[]] + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match() } + ) + } + + } + + + test("cnvkit batch germline hybrid mode - bam") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) + ] + input[1] = [[:],file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[2] = [[:],[]] + input[3] = [[:],file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true)] + input[4] = [[:],[]] + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match() } + ) + } + + } + + + test("cnvkit batch pon mode - bam") { + + config "./batch_pon.config" + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + [], + [file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + ] + ] + input[1] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + input[2] = [[:],[]] + input[3] = [[:],[]] + input[4] = [[:],[]] + input[5] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match() } + ) + } + + } + + + test("cnvkit batch - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) + ] + input[1] = [[:],file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[2] = [[:],[]] + input[3] = [[:],file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true)] + input[4] = [[:],[]] + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.bed).match() } + ) + } + + } + +} diff --git a/modules/nf-core/cnvkit/batch/tests/main.nf.test.snap b/modules/nf-core/cnvkit/batch/tests/main.nf.test.snap new file mode 100644 index 0000000..5d7cb14 --- /dev/null +++ b/modules/nf-core/cnvkit/batch/tests/main.nf.test.snap @@ -0,0 +1,86 @@ +{ + "cnvkit batch tumouronly mode - bam": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T14:30:55.150317561" + }, + "cnvkit batch tumouronly mode - cram": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T14:31:31.039652656" + }, + "cnvkit batch - bam - stub": { + "content": [ + [ + [ + { + "id": "test" + }, + [ + "baits.antitarget.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "baits.target.bed:md5,26d25ff2d6c45b6d92169b3559c6acdb" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T14:33:25.790391941" + }, + "cnvkit batch wgs - bam": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T14:30:10.677690173" + }, + "cnvkit batch germline hybrid mode - bam": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T14:32:50.217076532" + }, + "cnvkit batch hybrid mode - bam": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T13:54:41.69602289" + }, + "cnvkit batch wgs - cram": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T14:30:27.271060826" + }, + "cnvkit batch pon mode - bam": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T14:33:06.391306794" + }, + "cnvkit batch germline mode - cram": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T14:32:23.38326412" + } +} \ No newline at end of file diff --git a/modules/nf-core/cnvkit/batch/tests/tags.yml b/modules/nf-core/cnvkit/batch/tests/tags.yml new file mode 100644 index 0000000..1c8565c --- /dev/null +++ b/modules/nf-core/cnvkit/batch/tests/tags.yml @@ -0,0 +1,2 @@ +cnvkit/batch: + - "modules/nf-core/cnvkit/batch/**" diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test new file mode 100644 index 0000000..2289f73 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process GATK4_BEDTOINTERVALLIST" + script "../main.nf" + process "GATK4_BEDTOINTERVALLIST" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/bedtointervallist" + + test("test_gatk4_bedtointervallist") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + [file(params.modules_testdata_base_path + + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + input[1] = [ [ id:'dict' ], // meta map + [file(params.modules_testdata_base_path + + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap new file mode 100644 index 0000000..48c322f --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "test_gatk4_bedtointervallist": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.interval_list:md5,e51101c9357fb2d59fd30e370eefa39c" + ] + ], + "1": [ + "versions.yml:md5,29a18c36f27584eb5a5f2f5457088b3b" + ], + "interval_list": [ + [ + { + "id": "test" + }, + "test.interval_list:md5,e51101c9357fb2d59fd30e370eefa39c" + ] + ], + "versions": [ + "versions.yml:md5,29a18c36f27584eb5a5f2f5457088b3b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T14:20:12.168775" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml b/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml new file mode 100644 index 0000000..b4d54f1 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/bedtointervallist: + - "modules/nf-core/gatk4/bedtointervallist/**" diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/README.md b/modules/nf-core/gatk4/determinegermlinecontigploidy/README.md new file mode 100644 index 0000000..c6a4545 --- /dev/null +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/README.md @@ -0,0 +1,9 @@ +# Conda is not supported at the moment + +The [bioconda](https://bioconda.github.io/recipes/gatk4/README.html) recipe is not fully working as expected, cf [github issue](https://github.com/broadinstitute/gatk/issues/7811) + +Hence, we are using the docker container provided by the authors of the tool: + +- [broadinstitute/gatk](https://hub.docker.com/r/broadinstitute/gatk) + +This image is mirrored on the [nf-core quay.io](https://quay.io/repository/nf-core/gatk) for convenience. diff --git a/modules/nf-core/gatk4/germlinecnvcaller/README.md b/modules/nf-core/gatk4/germlinecnvcaller/README.md new file mode 100644 index 0000000..c6a4545 --- /dev/null +++ b/modules/nf-core/gatk4/germlinecnvcaller/README.md @@ -0,0 +1,9 @@ +# Conda is not supported at the moment + +The [bioconda](https://bioconda.github.io/recipes/gatk4/README.html) recipe is not fully working as expected, cf [github issue](https://github.com/broadinstitute/gatk/issues/7811) + +Hence, we are using the docker container provided by the authors of the tool: + +- [broadinstitute/gatk](https://hub.docker.com/r/broadinstitute/gatk) + +This image is mirrored on the [nf-core quay.io](https://quay.io/repository/nf-core/gatk) for convenience. diff --git a/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test b/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test new file mode 100644 index 0000000..ee99a54 --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test @@ -0,0 +1,105 @@ +// nf-core modules test gatk4/indexfeaturefile +nextflow_process { + + name "Test Process GATK4_INDEXFEATUREFILE" + script "../main.nf" + process "GATK4_INDEXFEATUREFILE" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/indexfeaturefile" + + test("test_gatk4_indexfeaturefile_bed") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert snapshot(file(process.out.index.get(0).get(1)).name).match("geneome.bed.idx") }, + ) + } + + } + + test("test_gatk4_indexfeaturefile_bed_gz") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_bed_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(file(process.out.index.get(0).get(1)).name).match("genome.bed.gz.tbi") }, + ) + } + + } + + test("test_gatk4_indexfeaturefile_vcf") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert snapshot(file(process.out.index.get(0).get(1)).name).match("test.genome.vcf.idx") }, + ) + } + + } + + test("test_gatk4_indexfeaturefile_vcf_gz") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(file(process.out.index.get(0).get(1)).name).match("test.genome.vcf.gz.tbi") }, + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test.snap b/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test.snap new file mode 100644 index 0000000..fc19333 --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test.snap @@ -0,0 +1,132 @@ +{ + "genome.bed.gz.tbi": { + "content": [ + "genome.bed.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:57:03.068725" + }, + "test_gatk4_indexfeaturefile_vcf": { + "content": [ + [ + "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:57:20.602472" + }, + "geneome.bed.idx": { + "content": [ + "genome.bed.idx" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:56:46.988441" + }, + "test.genome.vcf.gz.tbi": { + "content": [ + "test.genome.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:57:51.898472" + }, + "test_gatk4_indexfeaturefile_bed_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.bed.gz.tbi:md5,4bc51e2351a6e83f20e13be75861f941" + ] + ], + "1": [ + "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + ], + "index": [ + [ + { + "id": "test" + }, + "genome.bed.gz.tbi:md5,4bc51e2351a6e83f20e13be75861f941" + ] + ], + "versions": [ + "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:57:03.058351" + }, + "test_gatk4_indexfeaturefile_vcf_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.genome.vcf.gz.tbi:md5,fedd68eaddf8d31257853d9da8325bd3" + ] + ], + "1": [ + "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + ], + "index": [ + [ + { + "id": "test" + }, + "test.genome.vcf.gz.tbi:md5,fedd68eaddf8d31257853d9da8325bd3" + ] + ], + "versions": [ + "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:57:51.861697" + }, + "test.genome.vcf.idx": { + "content": [ + "test.genome.vcf.idx" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:57:20.624337" + }, + "test_gatk4_indexfeaturefile_bed": { + "content": [ + [ + "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:56:46.885162" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/indexfeaturefile/tests/tags.yml b/modules/nf-core/gatk4/indexfeaturefile/tests/tags.yml new file mode 100644 index 0000000..041bd3d --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/indexfeaturefile: + - "modules/nf-core/gatk4/indexfeaturefile/**" diff --git a/modules/nf-core/gatk4/intervallisttools/tests/main.nf.test b/modules/nf-core/gatk4/intervallisttools/tests/main.nf.test new file mode 100644 index 0000000..2891bf9 --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/tests/main.nf.test @@ -0,0 +1,72 @@ +nextflow_process { + + name "Test Process GATK4_INTERVALLISTTOOLS" + script "../main.nf" + process "GATK4_INTERVALLISTTOOLS" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/bedtointervallist" + tag "gatk4/intervallisttools" + + setup { + run("GATK4_BEDTOINTERVALLIST") { + script "../../bedtointervallist/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true)] + ] + input[1] = [ + [ id:'dict' ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true)] + ] + """ + } + } + } + + test("test_gatk4_intervallisttools") { + + when { + process { + """ + input[0] = GATK4_BEDTOINTERVALLIST.out.interval_list + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("test_gatk4_intervallisttools -stub") { + + options "-stub" + + when { + process { + """ + input[0] = GATK4_BEDTOINTERVALLIST.out.interval_list + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/intervallisttools/tests/main.nf.test.snap b/modules/nf-core/gatk4/intervallisttools/tests/main.nf.test.snap new file mode 100644 index 0000000..7718ed0 --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "test_gatk4_intervallisttools": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "1scattered.interval_list:md5,64f6665f9fbd257e4a300ec602f4e995", + "2scattered.interval_list:md5,f515c3da0c6accfd8e7dc33df50855c5", + "3scattered.interval_list:md5,7a918e8c9211b54334587793e8cbae53", + "4scattered.interval_list:md5,1b93105227a7dc81f07101a1efd31498" + ] + ] + ], + "1": [ + "versions.yml:md5,ff682cc9ad70d65a80280df57b316b03" + ], + "interval_list": [ + [ + { + "id": "test" + }, + [ + "1scattered.interval_list:md5,64f6665f9fbd257e4a300ec602f4e995", + "2scattered.interval_list:md5,f515c3da0c6accfd8e7dc33df50855c5", + "3scattered.interval_list:md5,7a918e8c9211b54334587793e8cbae53", + "4scattered.interval_list:md5,1b93105227a7dc81f07101a1efd31498" + ] + ] + ], + "versions": [ + "versions.yml:md5,ff682cc9ad70d65a80280df57b316b03" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T21:26:22.252885" + }, + "test_gatk4_intervallisttools -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "1scattered.interval_list:md5,d41d8cd98f00b204e9800998ecf8427e", + "2scattered.interval_list:md5,d41d8cd98f00b204e9800998ecf8427e", + "3scattered.interval_list:md5,d41d8cd98f00b204e9800998ecf8427e", + "4scattered.interval_list:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,ff682cc9ad70d65a80280df57b316b03" + ], + "interval_list": [ + [ + { + "id": "test" + }, + [ + "1scattered.interval_list:md5,d41d8cd98f00b204e9800998ecf8427e", + "2scattered.interval_list:md5,d41d8cd98f00b204e9800998ecf8427e", + "3scattered.interval_list:md5,d41d8cd98f00b204e9800998ecf8427e", + "4scattered.interval_list:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,ff682cc9ad70d65a80280df57b316b03" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T22:15:11.772344" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/intervallisttools/tests/nextflow.config b/modules/nf-core/gatk4/intervallisttools/tests/nextflow.config new file mode 100644 index 0000000..b24b20d --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GATK4_INTERVALLISTTOOLS { + ext.args = '--SCATTER_COUNT 6 --SUBDIVISION_MODE BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW --UNIQUE true --SORT true' + } +} diff --git a/modules/nf-core/gatk4/intervallisttools/tests/tags.yml b/modules/nf-core/gatk4/intervallisttools/tests/tags.yml new file mode 100644 index 0000000..bf85ff5 --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/intervallisttools: + - "modules/nf-core/gatk4/intervallisttools/**" diff --git a/modules/nf-core/gatk4/mutect2/tests/f1r2.config b/modules/nf-core/gatk4/mutect2/tests/f1r2.config new file mode 100644 index 0000000..2d3c8a1 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/tests/f1r2.config @@ -0,0 +1,3 @@ +process { + ext.args = { "--normal-sample $meta.normal_id --f1r2-tar-gz ${meta.id}.f1r2.tar.gz" } +} diff --git a/modules/nf-core/gatk4/mutect2/tests/main.nf.test b/modules/nf-core/gatk4/mutect2/tests/main.nf.test new file mode 100644 index 0000000..d247ee3 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/tests/main.nf.test @@ -0,0 +1,360 @@ +nextflow_process { + + name "Test Process GATK4_MUTECT2" + script "../main.nf" + process "GATK4_MUTECT2" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/mutect2" + + test("tumor_normal_pair") { + config "./pair.config" + when { + process { + """ + input[0] = [ + [ + id:'test', + normal_id:'normal', + tumor_id:'tumour' + ], + [ + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true) + ], + [ + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true) + ], + [] + ] + input[1] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + ] + input[4] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz'], checkIfExists: true) + input[5] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz_tbi'], checkIfExists: true) + input[6] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true) + input[7] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + process.out.vcf.collect { file(it[1]).getName() }, + process.out.tbi.collect { file(it[1]).getName() }, + process.out.stats, + process.out.f1r2, + process.out.versions, + ).match() + } + ) + } + } + + test("tumor_normal_pair_f1r2") { + config "./f1r2.config" + when { + process { + """ + input[0] = [ + [ + id:'test', + normal_id:'normal', + tumor_id:'tumour' + ], + [ + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true) + ], + [ + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true) + ], + [] + ] + input[1] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + ] + input[4] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz'], checkIfExists: true) + input[5] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz_tbi'], checkIfExists: true) + input[6] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true) + input[7] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true) + """ + } + } + then { + assertAll( + { assert process.success }, + { + assert snapshot( + process.out.vcf.collect { file(it[1]).getName() }, + process.out.tbi.collect { file(it[1]).getName() }, + process.out.stats, + process.out.f1r2.collect { file(it[1]).getName() }, + process.out.versions + ).match() + } + ) + } + } + test("tumor_single"){ + when { + process { + """ + input[0] = [ + [ id:'test'], + [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true)], + [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)], + [] + ] + input[1] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + ] + input[4] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz'], checkIfExists: true) + input[5] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz_tbi'], checkIfExists: true) + input[6] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true) + input[7] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true) + """ + } + } + then { + assertAll( + { assert process.success }, + { + assert snapshot( + process.out.vcf.collect { file(it[1]).getName() }, + process.out.tbi.collect { file(it[1]).getName() }, + process.out.stats, + process.out.f1r2, + process.out.versions + ).match() + } + ) + } + } + test("cram_input"){ + when { + process{ + """ + input[0] = [ + [ id:'test'], + [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true)], + [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)], + [] + ] + input[1] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + ] + input[4] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz'], checkIfExists: true) + input[5] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz_tbi'], checkIfExists: true) + input[6] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true) + input[7] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true) + """ + } + } + then { + assertAll( + { assert process.success }, + { + assert snapshot( + process.out.vcf.collect { file(it[1]).getName() }, + process.out.tbi.collect { file(it[1]).getName() }, + process.out.stats, + process.out.f1r2, + process.out.versions + ).match() + } + ) + } + } + + test("generate_pon") { + when { + process { + """ + input[0] = [ + [ id:'test'], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true)], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)], + [] + ] + input[1] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + ] + input[4] = [] + input[5] = [] + input[6] = [] + input[7] = [] + """ + } + } + then { + assertAll( + { assert process.success }, + { + assert snapshot( + process.out.vcf.collect { file(it[1]).getName() }, + process.out.tbi.collect { file(it[1]).getName() }, + process.out.stats, + process.out.f1r2, + process.out.versions + ).match() + } + ) + } + } + + test("mitochondria"){ + when { + process { + """ + input[0] = [ + [ id:'test'], + [ file(params.test_data['homo_sapiens']['illumina']['mitochon_standin_recalibrated_sorted_bam'], checkIfExists: true)], + [ file(params.test_data['homo_sapiens']['illumina']['mitochon_standin_recalibrated_sorted_bam_bai'], checkIfExists: true)], + [ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)] + ] + input[1] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + ] + input[4] = [] + input[5] = [] + input[6] = [] + input[7] = [] + """ + } + } + then { + assertAll( + { assert process.success }, + { + assert snapshot( + process.out.vcf.collect { file(it[1]).getName() }, + process.out.tbi.collect { file(it[1]).getName() }, + process.out.stats, + process.out.f1r2, + process.out.versions + ).match() + } + ) + } + } + + test("tumor_normal_pair_f1r2_stubs"){ + options "-stub-run" + when { + process { + """ + input[0] = [ + [ + id:'test', + normal_id:'normal', + tumor_id:'tumour' + ], + [ + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true) + ], + [ + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true) + ], + [] + ] + input[1] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + ] + input[4] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz'], checkIfExists: true) + input[5] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz_tbi'], checkIfExists: true) + input[6] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true) + input[7] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true) + """ + } + } + then { + assertAll( + { assert process.success }, + { + assert snapshot( + process.out.vcf.collect { file(it[1]).getName() }, + process.out.tbi.collect { file(it[1]).getName() }, + process.out.stats.collect { file(it[1]).getName() }, + process.out.f1r2.collect { file(it[1]).getName() }, + process.out.versions.collect { file(it[1]).getName() } + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/mutect2/tests/main.nf.test.snap b/modules/nf-core/gatk4/mutect2/tests/main.nf.test.snap new file mode 100644 index 0000000..f047af1 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/tests/main.nf.test.snap @@ -0,0 +1,204 @@ +{ + "tumor_normal_pair_f1r2_stubs": { + "content": [ + [ + "test.vcf.gz" + ], + [ + "test.vcf.gz.tbi" + ], + [ + "test.vcf.gz.stats" + ], + [ + "test.f1r2.tar.gz" + ], + [ + "h" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-03-21T10:14:45.599103891" + }, + "generate_pon": { + "content": [ + [ + "test.vcf.gz" + ], + [ + "test.vcf.gz.tbi" + ], + [ + [ + { + "id": "test" + }, + "test.vcf.gz.stats:md5,b569ce66bbffe9588b3d221e821023ee" + ] + ], + [ + + ], + [ + "versions.yml:md5,d94731c50c20569fe9896235a843f382" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-03-20T15:57:18.264453766" + }, + "mitochondria": { + "content": [ + [ + "test.vcf.gz" + ], + [ + "test.vcf.gz.tbi" + ], + [ + [ + { + "id": "test" + }, + "test.vcf.gz.stats:md5,4f77301a125913170b8e9e7828b4ca3f" + ] + ], + [ + + ], + [ + "versions.yml:md5,d94731c50c20569fe9896235a843f382" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-03-20T16:05:47.668766905" + }, + "cram_input": { + "content": [ + [ + "test.vcf.gz" + ], + [ + "test.vcf.gz.tbi" + ], + [ + [ + { + "id": "test" + }, + "test.vcf.gz.stats:md5,55ed641e16089afb33cdbc478e202d3d" + ] + ], + [ + + ], + [ + "versions.yml:md5,d94731c50c20569fe9896235a843f382" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-03-20T15:52:27.894730554" + }, + "tumor_single": { + "content": [ + [ + "test.vcf.gz" + ], + [ + "test.vcf.gz.tbi" + ], + [ + [ + { + "id": "test" + }, + "test.vcf.gz.stats:md5,55ed641e16089afb33cdbc478e202d3d" + ] + ], + [ + + ], + [ + "versions.yml:md5,d94731c50c20569fe9896235a843f382" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-03-20T15:43:28.935723443" + }, + "tumor_normal_pair": { + "content": [ + [ + "test.vcf.gz" + ], + [ + "test.vcf.gz.tbi" + ], + [ + [ + { + "id": "test", + "normal_id": "normal", + "tumor_id": "tumour" + }, + "test.vcf.gz.stats:md5,17d2091015d04cbd4a26b7a67dc659e6" + ] + ], + [ + + ], + [ + "versions.yml:md5,d94731c50c20569fe9896235a843f382" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-03-20T15:31:31.913366311" + }, + "tumor_normal_pair_f1r2": { + "content": [ + [ + "test.vcf.gz" + ], + [ + "test.vcf.gz.tbi" + ], + [ + [ + { + "id": "test", + "normal_id": "normal", + "tumor_id": "tumour" + }, + "test.vcf.gz.stats:md5,17d2091015d04cbd4a26b7a67dc659e6" + ] + ], + [ + "test.f1r2.tar.gz" + ], + [ + "versions.yml:md5,d94731c50c20569fe9896235a843f382" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-03-21T09:45:52.321385704" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/mutect2/tests/mito.config b/modules/nf-core/gatk4/mutect2/tests/mito.config new file mode 100644 index 0000000..de61d3e --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/tests/mito.config @@ -0,0 +1,3 @@ +process { + ext.args = { "--mitochondria-mode" } +} diff --git a/modules/nf-core/gatk4/mutect2/tests/pair.config b/modules/nf-core/gatk4/mutect2/tests/pair.config new file mode 100644 index 0000000..2a812b8 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/tests/pair.config @@ -0,0 +1,3 @@ +process { + ext.args = { "--normal-sample $meta.normal_id" } +} diff --git a/modules/nf-core/gatk4/mutect2/tests/tags.yml b/modules/nf-core/gatk4/mutect2/tests/tags.yml new file mode 100644 index 0000000..4618792 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/mutect2: + - "modules/nf-core/gatk4/mutect2/**" diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test new file mode 100644 index 0000000..17244ef --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -0,0 +1,122 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FAIDX" + script "../main.nf" + process "SAMTOOLS_FAIDX" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/faidx" + + test("test_samtools_faidx") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_bgzip") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fasta") { + + config "./nextflow2.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fai") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap new file mode 100644 index 0000000..3223b72 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -0,0 +1,249 @@ +{ + "test_samtools_faidx": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:14.779784761" + }, + "test_samtools_faidx_bgzip": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "3": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "versions": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:20.256633877" + }, + "test_samtools_faidx_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:25.632577273" + }, + "test_samtools_faidx_stub_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:31.058424849" + }, + "test_samtools_faidx_stub_fai": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:36.479929617" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/nextflow.config b/modules/nf-core/samtools/faidx/tests/nextflow.config new file mode 100644 index 0000000..f76a3ba --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = 'MT192765.1 -o extract.fa' + } + +} diff --git a/modules/nf-core/samtools/faidx/tests/nextflow2.config b/modules/nf-core/samtools/faidx/tests/nextflow2.config new file mode 100644 index 0000000..33ebbd5 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow2.config @@ -0,0 +1,6 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = '-o extract.fa' + } +} diff --git a/modules/nf-core/samtools/faidx/tests/tags.yml b/modules/nf-core/samtools/faidx/tests/tags.yml new file mode 100644 index 0000000..e4a8394 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/faidx: + - modules/nf-core/samtools/faidx/** diff --git a/tests/pipeline/cnvkit.nf.test.snap b/tests/pipeline/cnvkit.nf.test.snap index 7228c6b..15ffc8e 100644 --- a/tests/pipeline/cnvkit.nf.test.snap +++ b/tests/pipeline/cnvkit.nf.test.snap @@ -3,16 +3,24 @@ "content": [ "{CNVKIT_BATCH={cnvkit=0.9.10}, Workflow={nf-core/createpanelrefs=v1.0dev}}" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2024-02-21T12:34:52.978702536" }, "cnvkit": { "content": [ - "panel.cnn:md5,07dea67088da689ad04012552c606882", - "test.paired_end.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", - "test.paired_end.sorted.targetcoverage.cnn:md5,ff526714696aa49bdc1dc8d00d965266", - "test2.paired_end.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", - "test2.paired_end.sorted.targetcoverage.cnn:md5,6ae6b3fce7299eedca6133d911c38fe1" + "panel.cnn:md5,68028cd2b4e0fc4489bf5bfd0a73440f", + "test.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,0067cc3a0e479b23ab3bf056cead31b4", + "test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test2.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,1e1012812eb893afd931485cb760294e" ], - "timestamp": "2023-07-08T16:51:32+0000" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-11T16:44:31.390826495" } } \ No newline at end of file diff --git a/tests/test_assets/gens_software_versions.yaml b/tests/test_assets/gens_software_versions.yaml index 9fec793..b9adcb7 100644 --- a/tests/test_assets/gens_software_versions.yaml +++ b/tests/test_assets/gens_software_versions.yaml @@ -1,12 +1,12 @@ "GENS_PON:SAMTOOLS_INDEX": - samtools: 1.18 + samtools: 1.20 "GENS_PON:GATK4_COLLECTREADCOUNTS": - gatk4: 4.4.0.0 + gatk4: 4.5.0.0 "GENS_PON:SAMTOOLS_FAIDX": - samtools: 1.18 + samtools: 1.20 "GENS_PON:GATK4_PREPROCESSINTERVALS": - gatk4: 4.4.0.0 + gatk4: 4.5.0.0 "GENS_PON:PICARD_CREATESEQUENCEDICTIONARY": picard: 3.1.1 "GENS_PON:GATK4_CREATEREADCOUNTPANELOFNORMALS": - gatk4: 4.4.0.0 + gatk4: 4.5.0.0 diff --git a/tests/test_assets/germlinecnvcaller_software_versions.yaml b/tests/test_assets/germlinecnvcaller_software_versions.yaml index 9d9a027..cc07404 100644 --- a/tests/test_assets/germlinecnvcaller_software_versions.yaml +++ b/tests/test_assets/germlinecnvcaller_software_versions.yaml @@ -1,20 +1,20 @@ "GERMLINECNVCALLER_COHORT:GATK4_FILTERINTERVALS": - gatk4: 4.4.0.0 + gatk4: 4.5.0.0 "GERMLINECNVCALLER_COHORT:GATK4_COLLECTREADCOUNTS": - gatk4: 4.4.0.0 + gatk4: 4.5.0.0 "GERMLINECNVCALLER_COHORT:GATK4_GERMLINECNVCALLER": - gatk4: 4.4.0.0 + gatk4: 4.5.0.0 "GERMLINECNVCALLER_COHORT:GATK4_DETERMINEGERMLINECONTIGPLOIDY": - gatk4: 4.4.0.0 + gatk4: 4.5.0.0 "GERMLINECNVCALLER_COHORT:GATK4_ANNOTATEINTERVALS": - gatk4: 4.4.0.0 + gatk4: 4.5.0.0 "GERMLINECNVCALLER_COHORT:GATK4_INTERVALLISTTOOLS": - gatk4: 4.4.0.0 + gatk4: 4.5.0.0 "GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS": - gatk4: 4.4.0.0 + gatk4: 4.5.0.0 "GERMLINECNVCALLER_COHORT:SAMTOOLS_FAIDX": - samtools: 1.18 + samtools: 1.20 "GERMLINECNVCALLER_COHORT:SAMTOOLS_INDEX": - samtools: 1.18 + samtools: 1.20 "GERMLINECNVCALLER_COHORT:PICARD_CREATESEQUENCEDICTIONARY": picard: 3.1.1 From 0bfcd5923906d4cb70355508078792804dca0d4d Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 11 Jun 2024 17:12:31 +0200 Subject: [PATCH 122/234] exposed gens args --- CHANGELOG.md | 2 +- conf/modules/gens_pon.config | 4 ++-- nextflow.config | 8 +++++--- nextflow_schema.json | 11 +++++++++++ 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 733f7cc..5f7ba4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#19](https://github.com/nf-core/createpanelrefs/pull/19) - Updates germlinecnvcaller subworkflow to handle exome samples - [#24](https://github.com/nf-core/createpanelrefs/pull/24) - Updates germlinecnvcaller subworkflow to use mappability and segmental duplications track - [#24](https://github.com/nf-core/createpanelrefs/pull/24) - Updates germlinecnvcaller and gens subworkflows to use custom names for panel of normals. -- [#28](https://github.com/nf-core/createpanelrefs/pull/28) - Updates default args for gens subworkflow +- [#28](https://github.com/nf-core/createpanelrefs/pull/28) - Updates default args for gens subworkflow and made the parameters available from the command line. ### `Fixed` diff --git a/conf/modules/gens_pon.config b/conf/modules/gens_pon.config index e4ed2cc..734d754 100644 --- a/conf/modules/gens_pon.config +++ b/conf/modules/gens_pon.config @@ -52,8 +52,8 @@ process { } withName: '.*GENS_PON:GATK4_CREATEREADCOUNTPANELOFNORMALS' { - ext.args = { ["--minimum-interval-median-percentile 5.0", - "--maximum-chunk-size 167772150"].join(" ")} + ext.args = { ["--minimum-interval-median-percentile ${params.gens_min_interval_median_percentile}", + "--maximum-chunk-size ${params.maximum_chunk_size}"].join(" ")} publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/gens_pon/createreadcountpanelofnormals" }, diff --git a/nextflow.config b/nextflow.config index 67fee20..48eed54 100644 --- a/nextflow.config +++ b/nextflow.config @@ -32,9 +32,11 @@ params { gcnv_scatter_content = 5000 // Gens options - gens_bin_length = 100 - gens_pon_name = 'gens' - gens_readcount_format = 'HDF5' + gens_bin_length = 100 + gens_maximum_chunk_size = 167772150 + gens_min_interval_median_percentile = 5.0 + gens_pon_name = 'gens' + gens_readcount_format = 'HDF5' // CNVkit options cnvkit_targets = null diff --git a/nextflow_schema.json b/nextflow_schema.json index e05b538..36974d7 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -127,6 +127,17 @@ "fa_icon": "fas fa-sort-numeric-down", "help_text": "Used by GATK's PreprocessIntervals. We recommend a bin length of 100." }, + "gens_maximum_chunk_size": { + "type": "number", + "default": 167772150, + "description": "Maximum chunk size when writing the HDF5 file" + }, + "gens_min_interval_median_percentile": { + "type": "number", + "default": 5, + "description": "Minimum interval median percentile for gatk CreateReadCountPanelOfNormals", + "help_text": "Genomic intervals with a median (across samples) of fractional coverage (optionally corrected for GC bias) less than or equal to this percentile are filtered out. (This is the first filter applied.)" + }, "gens_pon_name": { "type": "string", "description": "Name for panel of normals.", From d05b1fe78f21296ec2d8b87a49e5277ebfffa898 Mon Sep 17 00:00:00 2001 From: jemten Date: Tue, 11 Jun 2024 17:26:19 +0200 Subject: [PATCH 123/234] fix typo --- conf/modules/gens_pon.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules/gens_pon.config b/conf/modules/gens_pon.config index 734d754..8ebd6cb 100644 --- a/conf/modules/gens_pon.config +++ b/conf/modules/gens_pon.config @@ -53,7 +53,7 @@ process { withName: '.*GENS_PON:GATK4_CREATEREADCOUNTPANELOFNORMALS' { ext.args = { ["--minimum-interval-median-percentile ${params.gens_min_interval_median_percentile}", - "--maximum-chunk-size ${params.maximum_chunk_size}"].join(" ")} + "--maximum-chunk-size ${params.gens_maximum_chunk_size}"].join(" ")} publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/gens_pon/createreadcountpanelofnormals" }, From 8711079ea9f19051f0090d6a9de042062cb7d14a Mon Sep 17 00:00:00 2001 From: jemten Date: Wed, 12 Jun 2024 16:53:57 +0200 Subject: [PATCH 124/234] publish gens intervals by default --- CHANGELOG.md | 1 + conf/modules/gens_pon.config | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f7ba4a..bc09d2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#24](https://github.com/nf-core/createpanelrefs/pull/24) - Updates germlinecnvcaller subworkflow to use mappability and segmental duplications track - [#24](https://github.com/nf-core/createpanelrefs/pull/24) - Updates germlinecnvcaller and gens subworkflows to use custom names for panel of normals. - [#28](https://github.com/nf-core/createpanelrefs/pull/28) - Updates default args for gens subworkflow and made the parameters available from the command line. +- [#31](https://github.com/nf-core/createpanelrefs/pull/31) - Publish interval_list file from gens subworkflow by default. ### `Fixed` diff --git a/conf/modules/gens_pon.config b/conf/modules/gens_pon.config index 8ebd6cb..f64835e 100644 --- a/conf/modules/gens_pon.config +++ b/conf/modules/gens_pon.config @@ -40,6 +40,11 @@ process { ext.args = { ["--imr OVERLAPPING_ONLY", "--bin-length ${params.gens_bin_length}"].join(" ") } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/gens_pon/intervals" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } withName: '.*GENS_PON:GATK4_COLLECTREADCOUNTS' { From c17e66345fe2a4fc6a540952a9e705a1757cb157 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Oct 2024 10:17:29 +0200 Subject: [PATCH 125/234] update modules and subworkflows --- modules.json | 8 +- modules/nf-core/gatk4/mutect2/environment.yml | 4 - modules/nf-core/gatk4/mutect2/meta.yml | 175 ++++++++++-------- .../nf-core/gatk4/mutect2/tests/main.nf.test | 124 ++++++------- modules/nf-core/multiqc/environment.yml | 4 - modules/nf-core/multiqc/main.nf | 5 - .../nf-core/multiqc/tests/main.nf.test.snap | 33 ---- .../nf-core/utils_nextflow_pipeline/main.nf | 16 +- .../nf-core/utils_nfcore_pipeline/main.nf | 10 +- 9 files changed, 176 insertions(+), 203 deletions(-) diff --git a/modules.json b/modules.json index e6d0dea..84f6bb5 100644 --- a/modules.json +++ b/modules.json @@ -67,7 +67,7 @@ }, "gatk4/mutect2": { "branch": "master", - "git_sha": "5fd04feb37b58caa6a54d41e38c80066bdf71056", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["bam_create_som_pon_gatk"] }, "gatk4/preprocessintervals": { @@ -77,7 +77,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "8f2062e7b4185590fb9f43c275381a31a6544fc0", + "git_sha": "b8d36829fa84b6e404364abff787e8b07f6d058c", "installed_by": ["modules"] }, "picard/createsequencedictionary": { @@ -106,12 +106,12 @@ }, "utils_nextflow_pipeline": { "branch": "master", - "git_sha": "9d05360da397692321d377b6102d2fb22507c6ef", + "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "772684d9d66f37b650c8ba5146ac1ee3ecba2acb", + "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { diff --git a/modules/nf-core/gatk4/mutect2/environment.yml b/modules/nf-core/gatk4/mutect2/environment.yml index 04fcfdb..55993f4 100644 --- a/modules/nf-core/gatk4/mutect2/environment.yml +++ b/modules/nf-core/gatk4/mutect2/environment.yml @@ -1,7 +1,3 @@ -<<<<<<< HEAD:modules/nf-core/fastqc/environment.yml -======= -name: gatk4_mutect2 ->>>>>>> dev:modules/nf-core/gatk4/mutect2/environment.yml channels: - conda-forge - bioconda diff --git a/modules/nf-core/gatk4/mutect2/meta.yml b/modules/nf-core/gatk4/mutect2/meta.yml index 21c928e..27fd63a 100644 --- a/modules/nf-core/gatk4/mutect2/meta.yml +++ b/modules/nf-core/gatk4/mutect2/meta.yml @@ -17,88 +17,113 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test'] - - input: - type: list - description: list of BAM files, also able to take CRAM as an input - pattern: "*.{bam/cram}" - - input_index: - type: list - description: list of BAM file indexes, also able to take CRAM indexes as an input - pattern: "*.{bam.bai/cram.crai}" - - intervals: - type: file - description: Specify region the tools is run on. - pattern: ".{bed,interval_list}" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - meta3: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] - - fai: - type: file - description: Index of reference fasta file - pattern: "*.fasta.fai" - - meta4: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] - - dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" - - germline_resource: - type: file - description: Population vcf of germline sequencing, containing allele fractions. - pattern: "*.vcf.gz" - - germline_resource_tbi: - type: file - description: Index file for the germline resource. - pattern: "*.vcf.gz.tbi" - - panel_of_normals: - type: file - description: vcf file to be used as a panel of normals. - pattern: "*.vcf.gz" - - panel_of_normals_tbi: - type: file - description: Index for the panel of normals. - pattern: "*.vcf.gz.tbi" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - input: + type: list + description: list of BAM files, also able to take CRAM as an input + pattern: "*.{bam/cram}" + - input_index: + type: list + description: list of BAM file indexes, also able to take CRAM indexes as an + input + pattern: "*.{bam.bai/cram.crai}" + - intervals: + type: file + description: Specify region the tools is run on. + pattern: ".{bed,interval_list}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - - germline_resource: + type: file + description: Population vcf of germline sequencing, containing allele fractions. + pattern: "*.vcf.gz" + - - germline_resource_tbi: + type: file + description: Index file for the germline resource. + pattern: "*.vcf.gz.tbi" + - - panel_of_normals: + type: file + description: vcf file to be used as a panel of normals. + pattern: "*.vcf.gz" + - - panel_of_normals_tbi: + type: file + description: Index for the panel of normals. + pattern: "*.vcf.gz.tbi" output: - vcf: - type: file - description: compressed vcf file - pattern: "*.vcf.gz" + - meta: + type: file + description: compressed vcf file + pattern: "*.vcf.gz" + - "*.vcf.gz": + type: file + description: compressed vcf file + pattern: "*.vcf.gz" - tbi: - type: file - description: Index of vcf file - pattern: "*vcf.gz.tbi" + - meta: + type: file + description: Index of vcf file + pattern: "*vcf.gz.tbi" + - "*.tbi": + type: file + description: Index of vcf file + pattern: "*vcf.gz.tbi" - stats: - type: file - description: Stats file that pairs with output vcf file - pattern: "*vcf.gz.stats" + - meta: + type: file + description: Stats file that pairs with output vcf file + pattern: "*vcf.gz.stats" + - "*.stats": + type: file + description: Stats file that pairs with output vcf file + pattern: "*vcf.gz.stats" - f1r2: - type: file - description: file containing information to be passed to LearnReadOrientationModel (only outputted when tumor_normal_pair mode is run) - pattern: "*.f1r2.tar.gz" + - meta: + type: file + description: file containing information to be passed to LearnReadOrientationModel + (only outputted when tumor_normal_pair mode is run) + pattern: "*.f1r2.tar.gz" + - "*.f1r2.tar.gz": + type: file + description: file containing information to be passed to LearnReadOrientationModel + (only outputted when tumor_normal_pair mode is run) + pattern: "*.f1r2.tar.gz" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@GCJMackenzie" - "@ramprasadn" diff --git a/modules/nf-core/gatk4/mutect2/tests/main.nf.test b/modules/nf-core/gatk4/mutect2/tests/main.nf.test index d247ee3..aea8d22 100644 --- a/modules/nf-core/gatk4/mutect2/tests/main.nf.test +++ b/modules/nf-core/gatk4/mutect2/tests/main.nf.test @@ -21,31 +21,31 @@ nextflow_process { tumor_id:'tumour' ], [ - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true) ], [ - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true) ], [] ] input[1] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) ] input[2] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) ] input[3] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict', checkIfExists: true) ] - input[4] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz'], checkIfExists: true) - input[5] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz_tbi'], checkIfExists: true) - input[6] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true) - input[7] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true) + input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz', checkIfExists: true) + input[5] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi', checkIfExists: true) + input[6] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz', checkIfExists: true) + input[7] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz.tbi', checkIfExists: true) """ } } @@ -78,31 +78,31 @@ nextflow_process { tumor_id:'tumour' ], [ - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true) ], [ - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true) ], [] ] input[1] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) ] input[2] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) ] input[3] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict', checkIfExists: true) ] - input[4] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz'], checkIfExists: true) - input[5] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz_tbi'], checkIfExists: true) - input[6] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true) - input[7] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true) + input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz', checkIfExists: true) + input[5] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi', checkIfExists: true) + input[6] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz', checkIfExists: true) + input[7] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz.tbi', checkIfExists: true) """ } } @@ -127,26 +127,26 @@ nextflow_process { """ input[0] = [ [ id:'test'], - [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true)], - [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true)], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true)], [] ] input[1] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) ] input[2] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) ] input[3] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict', checkIfExists: true) ] - input[4] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz'], checkIfExists: true) - input[5] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz_tbi'], checkIfExists: true) - input[6] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true) - input[7] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true) + input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz', checkIfExists: true) + input[5] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi', checkIfExists: true) + input[6] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz', checkIfExists: true) + input[7] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz.tbi', checkIfExists: true) """ } } @@ -171,26 +171,26 @@ nextflow_process { """ input[0] = [ [ id:'test'], - [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true)], - [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true)], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true)], [] ] input[1] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) ] input[2] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) ] input[3] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict', checkIfExists: true) ] - input[4] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz'], checkIfExists: true) - input[5] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz_tbi'], checkIfExists: true) - input[6] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true) - input[7] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true) + input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz', checkIfExists: true) + input[5] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi', checkIfExists: true) + input[6] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz', checkIfExists: true) + input[7] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz.tbi', checkIfExists: true) """ } } @@ -216,21 +216,21 @@ nextflow_process { """ input[0] = [ [ id:'test'], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true)], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true)], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true)], [] ] input[1] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) ] input[2] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) ] input[3] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict', checkIfExists: true) ] input[4] = [] input[5] = [] @@ -261,21 +261,21 @@ nextflow_process { """ input[0] = [ [ id:'test'], - [ file(params.test_data['homo_sapiens']['illumina']['mitochon_standin_recalibrated_sorted_bam'], checkIfExists: true)], - [ file(params.test_data['homo_sapiens']['illumina']['mitochon_standin_recalibrated_sorted_bam_bai'], checkIfExists: true)], - [ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam', checkIfExists: true)], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam.bai', checkIfExists: true)], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)] ] input[1] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] input[2] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] input[3] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) ] input[4] = [] input[5] = [] @@ -312,31 +312,31 @@ nextflow_process { tumor_id:'tumour' ], [ - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true) ], [ - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true) ], [] ] input[1] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) ] input[2] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) ] input[3] = [ [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict', checkIfExists: true) ] - input[4] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz'], checkIfExists: true) - input[5] = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz_tbi'], checkIfExists: true) - input[6] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true) - input[7] = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true) + input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz', checkIfExists: true) + input[5] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/gnomAD.r2.1.1.vcf.gz.tbi', checkIfExists: true) + input[6] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz', checkIfExists: true) + input[7] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/germlineresources/mills_and_1000G.indels.hg38.vcf.gz.tbi', checkIfExists: true) """ } } diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index 1135323..6f5b867 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -2,8 +2,4 @@ channels: - conda-forge - bioconda dependencies: -<<<<<<< HEAD - bioconda::multiqc=1.25.1 -======= - - bioconda::multiqc=1.22.2 ->>>>>>> dev diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 6b3b099..9724d2f 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,13 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? -<<<<<<< HEAD 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' : 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }" -======= - 'https://depot.galaxyproject.org/singularity/multiqc:1.22.2--pyhdfd78af_0' : - 'biocontainers/multiqc:1.22.2--pyhdfd78af_0' }" ->>>>>>> dev input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index 212111c..2fcbb5f 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,7 +2,6 @@ "multiqc_versions_single": { "content": [ [ -<<<<<<< HEAD "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], @@ -11,16 +10,6 @@ "nextflow": "24.04.4" }, "timestamp": "2024-10-02T17:51:46.317523" -======= - "versions.yml:md5,ddbc971a8307f9b9b7b973714cde29d0" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-10T11:50:10.874341679" ->>>>>>> dev }, "multiqc_stub": { "content": [ @@ -28,7 +17,6 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", -<<<<<<< HEAD "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], @@ -37,21 +25,10 @@ "nextflow": "24.04.4" }, "timestamp": "2024-10-02T17:52:20.680978" -======= - "versions.yml:md5,ddbc971a8307f9b9b7b973714cde29d0" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-10T11:50:49.271943761" ->>>>>>> dev }, "multiqc_versions_config": { "content": [ [ -<<<<<<< HEAD "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], @@ -60,15 +37,5 @@ "nextflow": "24.04.4" }, "timestamp": "2024-10-02T17:52:09.185842" -======= - "versions.yml:md5,ddbc971a8307f9b9b7b973714cde29d0" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-10T11:50:34.046706025" ->>>>>>> dev } } \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf index 2b0dc67..b17b877 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -3,9 +3,9 @@ // /* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SUBWORKFLOW DEFINITION -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ workflow UTILS_NEXTFLOW_PIPELINE { @@ -44,9 +44,9 @@ workflow UTILS_NEXTFLOW_PIPELINE { } /* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FUNCTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // @@ -106,13 +106,7 @@ def checkCondaChannels() { def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean // Check that they are in the right order - def channel_priority_violation = false - - required_channels_in_order.eachWithIndex { channel, index -> - if (index < required_channels_in_order.size() - 1) { - channel_priority_violation |= !(channels.indexOf(channel) < channels.indexOf(required_channels_in_order[index + 1])) - } - } + def channel_priority_violation = required_channels_in_order != channels.findAll { ch -> ch in required_channels_in_order } if (channels_missing | channel_priority_violation) { log.warn( diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index b78273c..5cb7baf 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -3,9 +3,9 @@ // /* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SUBWORKFLOW DEFINITION -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ workflow UTILS_NFCORE_PIPELINE { @@ -21,9 +21,9 @@ workflow UTILS_NFCORE_PIPELINE { } /* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FUNCTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // @@ -62,7 +62,7 @@ def checkProfileProvided(nextflow_cli_args) { def workflowCitation() { def temp_doi_ref = "" def manifest_doi = workflow.manifest.doi.tokenize(",") - // Using a loop to handle multiple DOIs + // Handling multiple DOIs // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers // Removing ` ` since the manifest.doi is a string and not a proper list manifest_doi.each { doi_ref -> From f0a1ae853b305eeffa5c71c5f1a03f17ab843645 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Oct 2024 10:19:45 +0200 Subject: [PATCH 126/234] prettier --- nextflow.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 4018b9b..20312ac 100644 --- a/nextflow.config +++ b/nextflow.config @@ -277,10 +277,10 @@ validation { """ afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} * The nf-core framework - https://doi.org/10.1038/s41587-020-0439-x + https://doi.org/10.1038/s41587-020-0439-x * Software dependencies - https://github.com/${manifest.name}/blob/master/CITATIONS.md + https://github.com/${manifest.name}/blob/master/CITATIONS.md """ } summary { From d9c12b7c7eefdaa2a41a5d9920d7e896a6b1a8a5 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Oct 2024 10:23:34 +0200 Subject: [PATCH 127/234] Include default --- .github/workflows/ci.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b6464de..16bd074 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,6 +55,8 @@ jobs: profile: "conda" - isMaster: false profile: "singularity" + include: + - tag: default steps: - name: Check out pipeline code uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 @@ -64,6 +66,11 @@ jobs: with: version: "${{ matrix.NXF_VER }}" + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: ${{ env.NFT_VER }} + - name: Set up apptainer if: matrix.profile == 'singularity' uses: eWaterCycle/setup-apptainer@main From 4464c1631f93b9865da398f23c0cf97b1f8e5f28 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Oct 2024 10:35:32 +0200 Subject: [PATCH 128/234] shard --- .github/workflows/ci.yml | 85 +++++++++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 31 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 16bd074..bbff218 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,10 @@ on: workflow_dispatch: env: + NFT_DIFF: "pdiff" + NFT_DIFF_ARGS: "--line-numbers --width 120 --expand-tabs=2" + NFT_VER: "0.9.0" + NFT_WORKDIR: "~" NXF_ANSI_LOG: false NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity @@ -19,54 +23,44 @@ concurrency: cancel-in-progress: true jobs: - changes: - name: Check for changes - runs-on: ubuntu-latest - outputs: - # Expose matched filters as job 'tags' output variable - tags: ${{ steps.filter.outputs.changes }} - steps: - - uses: actions/checkout@v3 - - uses: dorny/paths-filter@v2 - id: filter - with: - filters: "tests/config/tags.yml" test: - name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.tags }} | ${{ matrix.profile }})" - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/createpanelrefs') }}" runs-on: ubuntu-latest + name: "Test ${{ matrix.filter }} | ${{ matrix.profile }} | ${{ matrix.NXF_VER }} | ${{ matrix.shard }}/5" strategy: + fail-fast: false matrix: NXF_VER: - "24.04.2" - "latest-everything" - profile: - - "conda" - - "docker" - - "singularity" - # Run tests based on changes in code - tags: ["${{ fromJson(needs.changes.outputs.tags) }}"] + filter: ["workflow", "function", "pipeline"] + # filter: ["process", "workflow", "function", "pipeline"] + profile: ["conda", "docker", "singularity"] + shard: [1, 2, 3, 4, 5] isMaster: - ${{ github.base_ref == 'master' }} - # Exclude conda and singularity on dev exclude: - isMaster: false profile: "conda" - isMaster: false profile: "singularity" - include: - - tag: default + steps: - name: Check out pipeline code uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + architecture: "x64" - name: Set up Nextflow uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" - - name: Install nf-test + - name: Set up nf-test uses: nf-core/setup-nf-test@v1 with: version: ${{ env.NFT_VER }} @@ -81,6 +75,18 @@ jobs: mkdir -p $NXF_SINGULARITY_CACHEDIR mkdir -p $NXF_SINGULARITY_LIBRARYDIR + - name: Cache pdiff + uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4 + id: cache-pip-pdiff + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-pdiff + + - name: Set up pdiff to see diff between nf-test snapshots + run: | + python -m pip install --upgrade pip + pip install pdiff cryptography + - name: Set up Miniconda if: matrix.profile == 'conda' uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 @@ -93,15 +99,32 @@ jobs: - name: Set up Conda if: matrix.profile == 'conda' run: | - conda clean -a - conda install -n base conda-libmamba-solver - conda config --set solver libmamba echo $(realpath $CONDA)/condabin >> $GITHUB_PATH echo $(realpath python) >> $GITHUB_PATH - name: Disk space cleanup - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + uses: jlumbroso/free-disk-space@v1.3.1 + + - name: "Run tests | ${{ matrix.filter }}_${{ matrix.profile }} | ${{ matrix.shard }}/5" + run: | + nf-test test \ + --ci \ + --debug \ + --verbose \ + --junitxml="TEST-${{ matrix.filter }}_${{ matrix.profile }}_${{ matrix.shard }}.xml" \ + --shard ${{ matrix.shard }}/5 \ + --changed-since HEAD^ \ + --follow-dependencies \ + --profile "+${{ matrix.profile }}" \ + --filter ${{ matrix.filter }} + + - name: Publish Test Report + uses: mikepenz/action-junit-report@v4 + if: success() || failure() # always run even if the previous step fails + with: + report_paths: "TEST-*.xml" - - name: "Run nf-test ${{ matrix.tag }}" + - name: Clean up + if: always() run: | - nf-test test tests/pipeline/${{ matrix.tag }}.nf.test --profile "+${{ matrix.profile }}" --tap=test.tap + sudo rm -rf /home/ubuntu/tests/ From ed913a776b8243487f157c8ac8a0d02e038af6f9 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Oct 2024 11:19:21 +0200 Subject: [PATCH 129/234] refactor tests --- nf-test.config | 15 +- .../local/{gens_pon.nf => gens_pon/main.nf} | 12 +- .../main.nf} | 28 +- .../main.nf | 15 - tests/.nftignore | 8 + tests/config/tags.yml | 53 --- tests/default.nf.test | 36 ++ tests/default.nf.test.snap | 47 +++ tests/{pipeline => }/gens_pon.config | 0 tests/gens_pon.nf.test | 42 ++ .../germlinecnvcaller_cohort.config | 0 tests/germlinecnvcaller_cohort.nf.test | 42 ++ tests/lib/UTILS.groovy | 11 - tests/mutect2.nf.test | 37 ++ tests/mutect2.nf.test.snap | 383 ++++++++++++++++++ tests/pipeline/cnvkit.nf.test | 26 -- tests/pipeline/cnvkit.nf.test.snap | 26 -- tests/pipeline/default.nf.test | 25 -- tests/pipeline/default.nf.test.snap | 26 -- tests/pipeline/gens_pon.nf.test | 56 --- .../pipeline/germlinecnvcaller_cohort.nf.test | 64 --- tests/pipeline/mutect2.nf.test | 26 -- tests/pipeline/mutect2.nf.test.snap | 12 - tests/test_assets/gens_software_versions.yaml | 12 - .../germlinecnvcaller_software_versions.yaml | 20 - workflows/createpanelrefs.nf | 6 +- 26 files changed, 627 insertions(+), 401 deletions(-) rename subworkflows/local/{gens_pon.nf => gens_pon/main.nf} (81%) rename subworkflows/local/{germlinecnvcaller_cohort.nf => germlinecnvcaller_cohort/main.nf} (90%) create mode 100644 tests/.nftignore delete mode 100644 tests/config/tags.yml create mode 100644 tests/default.nf.test create mode 100644 tests/default.nf.test.snap rename tests/{pipeline => }/gens_pon.config (100%) create mode 100644 tests/gens_pon.nf.test rename tests/{pipeline => }/germlinecnvcaller_cohort.config (100%) create mode 100644 tests/germlinecnvcaller_cohort.nf.test delete mode 100644 tests/lib/UTILS.groovy create mode 100644 tests/mutect2.nf.test create mode 100644 tests/mutect2.nf.test.snap delete mode 100644 tests/pipeline/cnvkit.nf.test delete mode 100644 tests/pipeline/cnvkit.nf.test.snap delete mode 100644 tests/pipeline/default.nf.test delete mode 100644 tests/pipeline/default.nf.test.snap delete mode 100644 tests/pipeline/gens_pon.nf.test delete mode 100644 tests/pipeline/germlinecnvcaller_cohort.nf.test delete mode 100644 tests/pipeline/mutect2.nf.test delete mode 100644 tests/pipeline/mutect2.nf.test.snap delete mode 100644 tests/test_assets/gens_software_versions.yaml delete mode 100644 tests/test_assets/germlinecnvcaller_software_versions.yaml diff --git a/nf-test.config b/nf-test.config index c35074c..69ef731 100644 --- a/nf-test.config +++ b/nf-test.config @@ -1,16 +1,19 @@ config { // location for all nf-tests - testsDir "tests/pipeline" + testsDir "." // nf-test directory including temporary files for each test workDir ".nf-test" - // location of library folder that is added automatically to the classpath - libDir "tests/lib/" - // location of an optional nextflow.config file specific for executing tests - configFile "nextflow.config" + configFile "conf/test.config" // run all test with defined profile(s) from the main nextflow.config - profile "" + profile "test" + + // Include plugins + plugins { + load "nft-bam@0.4.0" + load "nft-utils@0.0.3" + } } diff --git a/subworkflows/local/gens_pon.nf b/subworkflows/local/gens_pon/main.nf similarity index 81% rename from subworkflows/local/gens_pon.nf rename to subworkflows/local/gens_pon/main.nf index 36cea2a..f9782a4 100644 --- a/subworkflows/local/gens_pon.nf +++ b/subworkflows/local/gens_pon/main.nf @@ -1,9 +1,9 @@ -include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main' -include { GATK4_CREATEREADCOUNTPANELOFNORMALS } from '../../modules/nf-core/gatk4/createreadcountpanelofnormals/main' -include { GATK4_PREPROCESSINTERVALS } from '../../modules/nf-core/gatk4/preprocessintervals/main' -include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main' -include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { GATK4_COLLECTREADCOUNTS } from '../../../modules/nf-core/gatk4/collectreadcounts/main' +include { GATK4_CREATEREADCOUNTPANELOFNORMALS } from '../../../modules/nf-core/gatk4/createreadcountpanelofnormals/main' +include { GATK4_PREPROCESSINTERVALS } from '../../../modules/nf-core/gatk4/preprocessintervals/main' +include { PICARD_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/picard/createsequencedictionary/main' +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' workflow GENS_PON { take: diff --git a/subworkflows/local/germlinecnvcaller_cohort.nf b/subworkflows/local/germlinecnvcaller_cohort/main.nf similarity index 90% rename from subworkflows/local/germlinecnvcaller_cohort.nf rename to subworkflows/local/germlinecnvcaller_cohort/main.nf index 21d82ab..ce0fa00 100644 --- a/subworkflows/local/germlinecnvcaller_cohort.nf +++ b/subworkflows/local/germlinecnvcaller_cohort/main.nf @@ -1,17 +1,17 @@ -include { GATK4_ANNOTATEINTERVALS } from '../../modules/nf-core/gatk4/annotateintervals/main' -include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_TARGETS } from '../../modules/nf-core/gatk4/bedtointervallist/main' -include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_EXCLUDE } from '../../modules/nf-core/gatk4/bedtointervallist/main' -include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main' -include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../modules/nf-core/gatk4/determinegermlinecontigploidy/main' -include { GATK4_FILTERINTERVALS } from '../../modules/nf-core/gatk4/filterintervals/main' -include { GATK4_GERMLINECNVCALLER } from '../../modules/nf-core/gatk4/germlinecnvcaller/main' -include { GATK4_INDEXFEATUREFILE as GATK4_INDEXFEATUREFILE_MAPPABILITY } from '../../modules/nf-core/gatk4/indexfeaturefile/main' -include { GATK4_INDEXFEATUREFILE as GATK4_INDEXFEATUREFILE_SEGDUP } from '../../modules/nf-core/gatk4/indexfeaturefile/main' -include { GATK4_INTERVALLISTTOOLS } from '../../modules/nf-core/gatk4/intervallisttools/main' -include { GATK4_PREPROCESSINTERVALS } from '../../modules/nf-core/gatk4/preprocessintervals/main' -include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main' -include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { GATK4_ANNOTATEINTERVALS } from '../../../modules/nf-core/gatk4/annotateintervals/main' +include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_TARGETS } from '../../../modules/nf-core/gatk4/bedtointervallist/main' +include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_EXCLUDE } from '../../../modules/nf-core/gatk4/bedtointervallist/main' +include { GATK4_COLLECTREADCOUNTS } from '../../../modules/nf-core/gatk4/collectreadcounts/main' +include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../../modules/nf-core/gatk4/determinegermlinecontigploidy/main' +include { GATK4_FILTERINTERVALS } from '../../../modules/nf-core/gatk4/filterintervals/main' +include { GATK4_GERMLINECNVCALLER } from '../../../modules/nf-core/gatk4/germlinecnvcaller/main' +include { GATK4_INDEXFEATUREFILE as GATK4_INDEXFEATUREFILE_MAPPABILITY } from '../../../modules/nf-core/gatk4/indexfeaturefile/main' +include { GATK4_INDEXFEATUREFILE as GATK4_INDEXFEATUREFILE_SEGDUP } from '../../../modules/nf-core/gatk4/indexfeaturefile/main' +include { GATK4_INTERVALLISTTOOLS } from '../../../modules/nf-core/gatk4/intervallisttools/main' +include { GATK4_PREPROCESSINTERVALS } from '../../../modules/nf-core/gatk4/preprocessintervals/main' +include { PICARD_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/picard/createsequencedictionary/main' +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' workflow GERMLINECNVCALLER_COHORT { take: diff --git a/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf b/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf index be77fb7..3a3c2d4 100644 --- a/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf @@ -75,7 +75,6 @@ workflow PIPELINE_INITIALISATION { ch_samplesheet = Channel .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) - .map { samplesheet -> validateInputSamplesheet(samplesheet)} emit: samplesheet = ch_samplesheet @@ -142,20 +141,6 @@ def validateInputParameters() { genomeExistsError() } -// -// Validate channels from input samplesheet -// -def validateInputSamplesheet(input) { - def (metas, fastqs) = input[1..2] - - // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 - if (!endedness_ok) { - error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") - } - - return [ metas[0], fastqs ] -} // // Get attribute from genome config file e.g. fasta // diff --git a/tests/.nftignore b/tests/.nftignore new file mode 100644 index 0000000..ec58fdc --- /dev/null +++ b/tests/.nftignore @@ -0,0 +1,8 @@ +multiqc/multiqc_data/multiqc.log +multiqc/multiqc_data/multiqc_data.json +multiqc/multiqc_data/multiqc_general_stats.txt +multiqc/multiqc_data/multiqc_software_versions.txt +multiqc/multiqc_data/multiqc_sources.txt +multiqc/multiqc_plots/{svg,pdf,png}/*.{svg,pdf,png} +multiqc/multiqc_report.html +pipeline_info/*.{html,json,txt,yml} diff --git a/tests/config/tags.yml b/tests/config/tags.yml deleted file mode 100644 index 0bb478d..0000000 --- a/tests/config/tags.yml +++ /dev/null @@ -1,53 +0,0 @@ -default: - - conf/** - - main.nf - - modules/** - - nextflow.config - - nextflow_schema.json - - subworkflows/** - - tests/*.nf.test - - workflows/** - -cnvkit: - - conf/modules/cnvkit.config - - modules/nf-core/cnvkit/batch/** - - tests/cnvkit.nf.test - -mutect2: - - conf/modules/mutect2.config - - modules/nf-core/gatk4/mutect2/** - - modules/nf-core/gatk4/genomicsdbimport/** - - modules/nf-core/gatk4/createsomaticpanelofnormals/** - - subworkflows/nf-core/bam_create_som_pon_gatk/** - -germlinecnvcaller_cohort: - - conf/modules/germlinecnvcaller_cohort.config - - modules/nf-core/gatk4/annotateintervals/** - - modules/nf-core/gatk4/bedtointervallist/** - - modules/nf-core/gatk4/bedtointervallist/** - - modules/nf-core/gatk4/collectreadcounts/** - - modules/nf-core/gatk4/determinegermlinecontigploidy/** - - modules/nf-core/gatk4/filterintervals/** - - modules/nf-core/gatk4/germlinecnvcaller/** - - modules/nf-core/gatk4/indexfeaturefile/** - - modules/nf-core/gatk4/intervallisttools/** - - modules/nf-core/gatk4/preprocessintervals/** - - modules/nf-core/picard/createsequencedictionary/** - - modules/nf-core/samtools/faidx/** - - modules/nf-core/samtools/index/** - - subworkflows/local/germlinecnvcaller_cohort.nf - - tests/pipeline/germlinecnvcaller_cohort.nf.test - - tests/pipeline/germlinecnvcaller_cohort.nf.config - - tests/test_assets/germlinecnvcaller_software_versions.yaml - -gens_pon: - - conf/modules/gens_pon.config - - modules/nf-core/gatk4/collectreadcounts/** - - modules/nf-core/gatk4/createreadcountpanelofnormals/** - - modules/nf-core/gatk4/preprocessintervals/** - - modules/nf-core/picard/createsequencedictionary/** - - modules/nf-core/samtools/faidx/** - - modules/nf-core/samtools/index/** - - subworkflows/local/gens_pon.nf - - tests/pipeline/gens_pon.nf.test - - tests/test_assets/gens_software_versions.yaml diff --git a/tests/default.nf.test b/tests/default.nf.test new file mode 100644 index 0000000..261cb87 --- /dev/null +++ b/tests/default.nf.test @@ -0,0 +1,36 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + tag "pipeline" + tag "pipeline_createpanelrefs" + + test("Run with profile test | default aka cnvkit") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap new file mode 100644 index 0000000..1b6302e --- /dev/null +++ b/tests/default.nf.test.snap @@ -0,0 +1,47 @@ +{ + "Run with profile test": { + "content": [ + 2, + { + "CNVKIT_BATCH": { + "cnvkit": "0.9.10" + }, + "Workflow": { + "nf-core/createpanelrefs": "v1.0dev" + } + }, + [ + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml", + "reference", + "reference/cnvkit", + "reference/cnvkit/panel.cnn", + "reference/cnvkit/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn", + "reference/cnvkit/test.paired_end.recalibrated.sorted.targetcoverage.cnn", + "reference/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn", + "reference/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn" + ], + [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "panel.cnn:md5,68028cd2b4e0fc4489bf5bfd0a73440f", + "test.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,0067cc3a0e479b23ab3bf056cead31b4", + "test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test2.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,1e1012812eb893afd931485cb760294e" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-11T11:10:55.4912" + } +} \ No newline at end of file diff --git a/tests/pipeline/gens_pon.config b/tests/gens_pon.config similarity index 100% rename from tests/pipeline/gens_pon.config rename to tests/gens_pon.config diff --git a/tests/gens_pon.nf.test b/tests/gens_pon.nf.test new file mode 100644 index 0000000..2a2ce10 --- /dev/null +++ b/tests/gens_pon.nf.test @@ -0,0 +1,42 @@ +nextflow_pipeline { + + name "Test pipeline | gens" + script "../main.nf" + tag "pipeline" + tag "pipeline_createpanelrefs" + config "./gens_pon.config" + + test("Run gens test") { + + when { + params { + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" + gens_bin_length = 100 + gens_pon_name = 'gens_pon' + gens_readcount_format = "TSV" + outdir = "$outputDir" + tools = 'gens' + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/pipeline/germlinecnvcaller_cohort.config b/tests/germlinecnvcaller_cohort.config similarity index 100% rename from tests/pipeline/germlinecnvcaller_cohort.config rename to tests/germlinecnvcaller_cohort.config diff --git a/tests/germlinecnvcaller_cohort.nf.test b/tests/germlinecnvcaller_cohort.nf.test new file mode 100644 index 0000000..612c9d1 --- /dev/null +++ b/tests/germlinecnvcaller_cohort.nf.test @@ -0,0 +1,42 @@ +nextflow_pipeline { + + name "Test pipeline | germlinecnvcaller" + script "../main.nf" + tag "pipeline" + tag "pipeline_createpanelrefs" + config "./germlinecnvcaller_cohort.config" + + test("Run germlinecnvcaller test") { + + when { + params { + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" + gcnv_model_name = 'cohort' + gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" + gcnv_scatter_content = 2 + outdir = "$outputDir" + tools = 'germlinecnvcaller' + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/lib/UTILS.groovy b/tests/lib/UTILS.groovy deleted file mode 100644 index deacb58..0000000 --- a/tests/lib/UTILS.groovy +++ /dev/null @@ -1,11 +0,0 @@ -// Function to remove Nextflow version from software_versions.yml - -class UTILS { - public static String removeNextflowVersion(outputDir) { - def softwareVersions = path("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml").yaml - if (softwareVersions.containsKey("Workflow")) { - softwareVersions.Workflow.remove("Nextflow") - } - return softwareVersions - } -} diff --git a/tests/mutect2.nf.test b/tests/mutect2.nf.test new file mode 100644 index 0000000..4e63875 --- /dev/null +++ b/tests/mutect2.nf.test @@ -0,0 +1,37 @@ +nextflow_pipeline { + + name "Test MUTECT2_PON" + script "main.nf" + tag "MUTECT2" + + test("Run MUTECT2 test") { + + when { + params { + outdir = "$outputDir" + tools = 'mutect2' + mutect2_pon_name = 'test' + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/mutect2.nf.test.snap b/tests/mutect2.nf.test.snap new file mode 100644 index 0000000..c9c5f88 --- /dev/null +++ b/tests/mutect2.nf.test.snap @@ -0,0 +1,383 @@ +{ + "Run MUTECT2 test": { + "content": [ + 5, + { + "GATK4_CREATESOMATICPANELOFNORMALS": { + "gatk4": "4.5.0.0" + }, + "GATK4_GENOMICSDBIMPORT": { + "gatk4": "4.5.0.0" + }, + "GATK4_MUTECT2": { + "gatk4": "4.5.0.0" + }, + "Workflow": { + "nf-core/createpanelrefs": "v1.0dev" + } + }, + [ + "gatk4", + "gatk4/sample1.vcf.gz", + "gatk4/sample1.vcf.gz.stats", + "gatk4/sample1.vcf.gz.tbi", + "gatk4/sample2.vcf.gz", + "gatk4/sample2.vcf.gz.stats", + "gatk4/sample2.vcf.gz.tbi", + "gatk4/test", + "gatk4/test.vcf.gz", + "gatk4/test.vcf.gz.tbi", + "gatk4/test/__tiledb_workspace.tdb", + "gatk4/test/callset.json", + "gatk4/test/chr21$2$23354000", + "gatk4/test/chr21$2$23354000/.__consolidation_lock", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/AD.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/AD_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/AF.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/AF_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/ALT.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/ALT_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/AS_SB_TABLE.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/AS_SB_TABLE_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/AS_UNIQ_ALT_READ_COUNT.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/AS_UNIQ_ALT_READ_COUNT_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/CONTQ.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/DP.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/DP_FORMAT.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/ECNT.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/END.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/F1R2.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/F1R2_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/F2R1.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/F2R1_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/FAD.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/FAD_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/FILTER.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/FILTER_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/GERMQ.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/GQ.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/GT.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/GT_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/ID.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/ID_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/MBQ.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/MBQ_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/MFRL.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/MFRL_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/MMQ.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/MMQ_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/MPOS.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/MPOS_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/NALOD.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/NALOD_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/NCount.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/NLOD.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/NLOD_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/OCM.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/PGT.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/PGT_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/PID.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/PID_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/PL.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/PL_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/PON.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/POPAF.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/POPAF_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/PS.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/QUAL.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/REF.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/REF_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/ROQ.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/RPA.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/RPA_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/RU.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/RU_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/SB.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/SEQQ.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/STR.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/STRANDQ.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/STRQ.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/TLOD.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/TLOD_var.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/__book_keeping.tdb.gz", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/__coords.tdb", + "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/__tiledb_fragment.tdb", + "gatk4/test/chr21$2$23354000/__array_schema.tdb", + "gatk4/test/chr21$2$23354000/genomicsdb_meta_dir", + "gatk4/test/chr21$2$23354000/genomicsdb_meta_dir/genomicsdb_column_bounds.json", + "gatk4/test/chr21$2$23354000/genomicsdb_meta_dir/genomicsdb_meta_2fa49a66-d70b-472f-b420-a0eca92715ab.json", + "gatk4/test/chr21$24132500$24910998", + "gatk4/test/chr21$24132500$24910998/.__consolidation_lock", + "gatk4/test/chr21$24132500$24910998/__array_schema.tdb", + "gatk4/test/chr21$24132500$24910998/genomicsdb_meta_dir", + "gatk4/test/chr21$24132500$24910998/genomicsdb_meta_dir/genomicsdb_column_bounds.json", + "gatk4/test/chr21$24132500$24910998/genomicsdb_meta_dir/genomicsdb_meta_d23be0e8-765f-4373-887f-74a701b5d7c1.json", + "gatk4/test/chr21$25689498$46709983", + "gatk4/test/chr21$25689498$46709983/.__consolidation_lock", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/AD.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/AD_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/AF.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/AF_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/ALT.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/ALT_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/AS_SB_TABLE.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/AS_SB_TABLE_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/AS_UNIQ_ALT_READ_COUNT.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/AS_UNIQ_ALT_READ_COUNT_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/CONTQ.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/DP.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/DP_FORMAT.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/ECNT.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/END.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/F1R2.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/F1R2_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/F2R1.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/F2R1_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/FAD.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/FAD_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/FILTER.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/FILTER_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/GERMQ.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/GQ.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/GT.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/GT_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/ID.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/ID_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/MBQ.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/MBQ_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/MFRL.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/MFRL_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/MMQ.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/MMQ_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/MPOS.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/MPOS_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/NALOD.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/NALOD_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/NCount.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/NLOD.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/NLOD_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/OCM.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/PGT.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/PGT_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/PID.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/PID_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/PL.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/PL_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/PON.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/POPAF.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/POPAF_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/PS.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/QUAL.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/REF.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/REF_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/ROQ.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/RPA.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/RPA_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/RU.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/RU_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/SB.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/SEQQ.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/STR.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/STRANDQ.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/STRQ.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/TLOD.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/TLOD_var.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/__book_keeping.tdb.gz", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/__coords.tdb", + "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/__tiledb_fragment.tdb", + "gatk4/test/chr21$25689498$46709983/__array_schema.tdb", + "gatk4/test/chr21$25689498$46709983/genomicsdb_meta_dir", + "gatk4/test/chr21$25689498$46709983/genomicsdb_meta_dir/genomicsdb_column_bounds.json", + "gatk4/test/chr21$25689498$46709983/genomicsdb_meta_dir/genomicsdb_meta_fbfd9118-39ee-4677-b032-6b3d1bd1112d.json", + "gatk4/test/vcfheader.vcf", + "gatk4/test/vidmap.json", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml" + ], + [ + "sample1.vcf.gz:md5,a63db49bc7baa4be03a628874d84253c", + "sample1.vcf.gz.stats:md5,b569ce66bbffe9588b3d221e821023ee", + "sample1.vcf.gz.tbi:md5,36840415631bb7c34020226e52f3d6e0", + "sample2.vcf.gz:md5,97926f56dd5658ee3919c6519d44fea7", + "sample2.vcf.gz.stats:md5,76f749c53212d72e98801f6030fbf8a6", + "sample2.vcf.gz.tbi:md5,4d590889c2dcd94f34f299fe332e7ace", + "test.vcf.gz:md5,114f52209d7eb6e3535a9e919c3ef750", + "test.vcf.gz.tbi:md5,16cd8d8eef807e56e7fa07511e770fc4", + "__tiledb_workspace.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "callset.json:md5,9123ad4a3154d434f043bf33f620fdc0", + ".__consolidation_lock:md5,d41d8cd98f00b204e9800998ecf8427e", + "AD.tdb:md5,25e5ccdc9d8c69b8c15c8df2d1ed3d9b", + "AD_var.tdb:md5,c6298c72157f5eef097c8d2ec2d8d69b", + "AF.tdb:md5,5cbd55d3cd8d16abc21966594ce57fc3", + "AF_var.tdb:md5,198d9f4aa5fe6317fe0b95ee7150f9e2", + "ALT.tdb:md5,0c7adeac8eb9f5e5ad8d2f329d9724d8", + "ALT_var.tdb:md5,d5e686faf8ef044da6fbd8bafefa2112", + "AS_SB_TABLE.tdb:md5,fb1bb3202a5edd9476715e6104cf5efb", + "AS_SB_TABLE_var.tdb:md5,ce1fbe64dd090f27e4c6cd0797f736e6", + "AS_UNIQ_ALT_READ_COUNT.tdb:md5,f0f251db348182d0e1c0b90c9c4b3a26", + "AS_UNIQ_ALT_READ_COUNT_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "CONTQ.tdb:md5,50d93ee3e417eb4fa8fc5cfa2f6d5e7a", + "DP.tdb:md5,cefb507760e66eea7ae9177f0621f6d6", + "DP_FORMAT.tdb:md5,93fd90800abe7fb9f801d3b6f25c66ee", + "ECNT.tdb:md5,12a5e5d13ac7d5690f8dff812c2bc016", + "END.tdb:md5,e599ed1cff39dbb8cee8b4e1e6e97225", + "F1R2.tdb:md5,25e5ccdc9d8c69b8c15c8df2d1ed3d9b", + "F1R2_var.tdb:md5,1ff77c8ab25de51242af2e5f817d95c9", + "F2R1.tdb:md5,25e5ccdc9d8c69b8c15c8df2d1ed3d9b", + "F2R1_var.tdb:md5,f0f251db348182d0e1c0b90c9c4b3a26", + "FAD.tdb:md5,25e5ccdc9d8c69b8c15c8df2d1ed3d9b", + "FAD_var.tdb:md5,bb4669be39cbaf3b0317ae6c149d9a40", + "FILTER.tdb:md5,f0f251db348182d0e1c0b90c9c4b3a26", + "FILTER_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "GERMQ.tdb:md5,79096811e8da4fe8e2d6222bab0da84c", + "GQ.tdb:md5,79096811e8da4fe8e2d6222bab0da84c", + "GT.tdb:md5,4f6ef31d1b6c72de7b57676f7a1a137b", + "GT_var.tdb:md5,d47feb14db80563cb02c0d555c6f5fcb", + "ID.tdb:md5,f0f251db348182d0e1c0b90c9c4b3a26", + "ID_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "MBQ.tdb:md5,25e5ccdc9d8c69b8c15c8df2d1ed3d9b", + "MBQ_var.tdb:md5,f7c52efa68a18663e34634328e1cbdc7", + "MFRL.tdb:md5,25e5ccdc9d8c69b8c15c8df2d1ed3d9b", + "MFRL_var.tdb:md5,c2cdfeeda7b925527f0339e6d5c17260", + "MMQ.tdb:md5,25e5ccdc9d8c69b8c15c8df2d1ed3d9b", + "MMQ_var.tdb:md5,51154b8d2d36ffa27cd4b0c9e3ae7a5b", + "MPOS.tdb:md5,5cbd55d3cd8d16abc21966594ce57fc3", + "MPOS_var.tdb:md5,7e6ef7060c1bb2ee518b9fe44c12f347", + "NALOD.tdb:md5,f0f251db348182d0e1c0b90c9c4b3a26", + "NALOD_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "NCount.tdb:md5,79096811e8da4fe8e2d6222bab0da84c", + "NLOD.tdb:md5,f0f251db348182d0e1c0b90c9c4b3a26", + "NLOD_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "OCM.tdb:md5,79096811e8da4fe8e2d6222bab0da84c", + "PGT.tdb:md5,05d194cf0ffc4996654e0b8c8a8eb398", + "PGT_var.tdb:md5,73bfb9313cd44680edd7a04d71ca9fce", + "PID.tdb:md5,23af41fb82e6603db19601b0be66493a", + "PID_var.tdb:md5,eb3a0ddfbb1dab3ce49ab77cfa7f99fc", + "PL.tdb:md5,f0f251db348182d0e1c0b90c9c4b3a26", + "PL_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "PON.tdb:md5,e575419bd56e92436577ff6d3e4111f0", + "POPAF.tdb:md5,5cbd55d3cd8d16abc21966594ce57fc3", + "POPAF_var.tdb:md5,cd28781617a019cf46ad8e1038da88c7", + "PS.tdb:md5,d8cf85d78d219a031776f6f515056aec", + "QUAL.tdb:md5,50d93ee3e417eb4fa8fc5cfa2f6d5e7a", + "REF.tdb:md5,83700bfc6d35d223e06a74b058187930", + "REF_var.tdb:md5,4d6ea0c8274a3dd075c6c2a6bbf3f677", + "ROQ.tdb:md5,50d93ee3e417eb4fa8fc5cfa2f6d5e7a", + "RPA.tdb:md5,b5f8bdcaa40febf3dd0e060a3970b837", + "RPA_var.tdb:md5,76f38604002ea9636ec5ffba2d7bf944", + "RU.tdb:md5,835980cee5a0b7ef97b8ecba32c34e22", + "RU_var.tdb:md5,6a93cc1625c89597aded01d38fa055e1", + "SB.tdb:md5,e543301811b472ebd2b09768d730f3fd", + "SEQQ.tdb:md5,79096811e8da4fe8e2d6222bab0da84c", + "STR.tdb:md5,71d5aef955aad96bf10985b87c95942b", + "STRANDQ.tdb:md5,79096811e8da4fe8e2d6222bab0da84c", + "STRQ.tdb:md5,79096811e8da4fe8e2d6222bab0da84c", + "TLOD.tdb:md5,5cbd55d3cd8d16abc21966594ce57fc3", + "TLOD_var.tdb:md5,09be85fd8c5e723f194ac0943c534b0d", + "__book_keeping.tdb.gz:md5,7668f7d0db0c302012df3474906c9925", + "__coords.tdb:md5,1cc3f9ba8c908a44b40a98badfff61c4", + "__tiledb_fragment.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "__array_schema.tdb:md5,d983ef3d226f3c38f68bc4f32ecda33c", + "genomicsdb_column_bounds.json:md5,fbd648f7064cb9a10b3b32a7cae34c9a", + "genomicsdb_meta_2fa49a66-d70b-472f-b420-a0eca92715ab.json:md5,d65998127173145d94d773e3350881e5", + ".__consolidation_lock:md5,d41d8cd98f00b204e9800998ecf8427e", + "__array_schema.tdb:md5,b9993caa8b3d41d059b10ada5101d9e8", + "genomicsdb_column_bounds.json:md5,e1122df80f0b75d9103e6616ed52b9ee", + "genomicsdb_meta_d23be0e8-765f-4373-887f-74a701b5d7c1.json:md5,d65998127173145d94d773e3350881e5", + ".__consolidation_lock:md5,d41d8cd98f00b204e9800998ecf8427e", + "AD.tdb:md5,d520808c95b3d0ee9847e7923ebaaa6c", + "AD_var.tdb:md5,e90e2bc5a01648a3e07580b2034c44c6", + "AF.tdb:md5,89f24fce2fe1ae768a4f87f6c5644dc4", + "AF_var.tdb:md5,def80c5a7585274c98a766295bfedfe7", + "ALT.tdb:md5,ba6115e1ffe49ff75479308f32db93d6", + "ALT_var.tdb:md5,2a40741d36c3bc86c2f5f367f767cfd7", + "AS_SB_TABLE.tdb:md5,72d1f4116d2cbc75fa99b253d26d4ad9", + "AS_SB_TABLE_var.tdb:md5,c752faee89401485bc1fd70556b7d4f6", + "AS_UNIQ_ALT_READ_COUNT.tdb:md5,59eec9e1c659b9a126a5b23271d964a5", + "AS_UNIQ_ALT_READ_COUNT_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "CONTQ.tdb:md5,bdc87df1ac52acb83558b0f32fa08879", + "DP.tdb:md5,30b50605950fad37342590dc9ffdb11a", + "DP_FORMAT.tdb:md5,67d55b077b5e29894448d9b69d1fff58", + "ECNT.tdb:md5,7cf820a3342d7b52f0d75d5a0b15ed64", + "END.tdb:md5,27c5810d48524cce702f3b7ce3515ea5", + "F1R2.tdb:md5,d520808c95b3d0ee9847e7923ebaaa6c", + "F1R2_var.tdb:md5,77b8e09f76d9a09d9c46e6e116af82d9", + "F2R1.tdb:md5,d520808c95b3d0ee9847e7923ebaaa6c", + "F2R1_var.tdb:md5,59eec9e1c659b9a126a5b23271d964a5", + "FAD.tdb:md5,d520808c95b3d0ee9847e7923ebaaa6c", + "FAD_var.tdb:md5,48af683eafcb66c8dad9951ffe88d04a", + "FILTER.tdb:md5,59eec9e1c659b9a126a5b23271d964a5", + "FILTER_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "GERMQ.tdb:md5,17ee8dd46c227a6c999de499e46327ca", + "GQ.tdb:md5,17ee8dd46c227a6c999de499e46327ca", + "GT.tdb:md5,8968e4582af714b67f8dceb07e8991ca", + "GT_var.tdb:md5,4ecf12b42be9b68badebb1c690c0809e", + "ID.tdb:md5,59eec9e1c659b9a126a5b23271d964a5", + "ID_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "MBQ.tdb:md5,d520808c95b3d0ee9847e7923ebaaa6c", + "MBQ_var.tdb:md5,eb85c62a51886544118df9dfaa5045c3", + "MFRL.tdb:md5,d520808c95b3d0ee9847e7923ebaaa6c", + "MFRL_var.tdb:md5,c1bf3db7ef8dd15836dec497574d778f", + "MMQ.tdb:md5,d520808c95b3d0ee9847e7923ebaaa6c", + "MMQ_var.tdb:md5,8f266ac111f437dd74eaeb80f054d3ed", + "MPOS.tdb:md5,89f24fce2fe1ae768a4f87f6c5644dc4", + "MPOS_var.tdb:md5,d330fe7e1921b80da418689d1dbac682", + "NALOD.tdb:md5,59eec9e1c659b9a126a5b23271d964a5", + "NALOD_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "NCount.tdb:md5,17ee8dd46c227a6c999de499e46327ca", + "NLOD.tdb:md5,59eec9e1c659b9a126a5b23271d964a5", + "NLOD_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "OCM.tdb:md5,17ee8dd46c227a6c999de499e46327ca", + "PGT.tdb:md5,1c9fcbab7490fde6bf8f1c6c98dd6fe1", + "PGT_var.tdb:md5,b23eb2be3856f3c8e98032041a19d135", + "PID.tdb:md5,9f7589ebbef726f2d4bb518ed2fce1fe", + "PID_var.tdb:md5,47bacc6fe0f1b318747dc56ebb4d6e3e", + "PL.tdb:md5,59eec9e1c659b9a126a5b23271d964a5", + "PL_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "PON.tdb:md5,7915e8a208af602b1f603ad88ca0613b", + "POPAF.tdb:md5,89f24fce2fe1ae768a4f87f6c5644dc4", + "POPAF_var.tdb:md5,23f91f0f95fe5fce57c2b3ae5027f196", + "PS.tdb:md5,e6ccd6a8ff224f0780b88a533489cefc", + "QUAL.tdb:md5,bdc87df1ac52acb83558b0f32fa08879", + "REF.tdb:md5,c4dedc9bd820cce6afb9558bfb5217a4", + "REF_var.tdb:md5,963bf8052cc72f58a4e9247d476660be", + "ROQ.tdb:md5,bdc87df1ac52acb83558b0f32fa08879", + "RPA.tdb:md5,0b5b1b1ec7a46a2c2ac451de952440f9", + "RPA_var.tdb:md5,0e3ab36d395525c86e4122a613ca2e9c", + "RU.tdb:md5,80e670397dc61b8d4f65dd148758cbcf", + "RU_var.tdb:md5,e856786f8d55b52e3406d8f4f31de900", + "SB.tdb:md5,f13d419074793d70c6c66305d56235e6", + "SEQQ.tdb:md5,17ee8dd46c227a6c999de499e46327ca", + "STR.tdb:md5,da41a617915015415f8b8a5499fa18b0", + "STRANDQ.tdb:md5,17ee8dd46c227a6c999de499e46327ca", + "STRQ.tdb:md5,17ee8dd46c227a6c999de499e46327ca", + "TLOD.tdb:md5,89f24fce2fe1ae768a4f87f6c5644dc4", + "TLOD_var.tdb:md5,0f73e670bb2aca83b861d83cd908ae3a", + "__book_keeping.tdb.gz:md5,7f2bd5c335cf117ff7c0bf13d38f68e3", + "__coords.tdb:md5,78c8fb3250f31495f89a96047ff1f8ac", + "__tiledb_fragment.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", + "__array_schema.tdb:md5,f623aa15e93722a5ab35423bc60adb74", + "genomicsdb_column_bounds.json:md5,41f164432abe9a6d6c559d90e9a7cabc", + "genomicsdb_meta_fbfd9118-39ee-4677-b032-6b3d1bd1112d.json:md5,d65998127173145d94d773e3350881e5", + "vcfheader.vcf:md5,3eed5f181c05191fe5f43cfb64a255f3", + "vidmap.json:md5,bdd4857ec33a3a7a01211bfd21037a7a", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-11T11:16:59.841003" + } +} \ No newline at end of file diff --git a/tests/pipeline/cnvkit.nf.test b/tests/pipeline/cnvkit.nf.test deleted file mode 100644 index 115e5b9..0000000 --- a/tests/pipeline/cnvkit.nf.test +++ /dev/null @@ -1,26 +0,0 @@ -nextflow_pipeline { - - name "Test CNVKIT_BATCH" - script "main.nf" - tag "cnvkit" - - test("Run cnvkit test") { - - when { - params { - outdir = "$outputDir" - tools = 'cnvkit' - validationSchemaIgnoreParams = 'genomes,baseDir,base-dir,outputDir,output-dir' - } - } - - then { - assertAll( - { assert workflow.success }, - { assert new File("$outputDir/multiqc/").exists() }, - { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, - { assert snapshot(path("$outputDir/reference/cnvkit/").list()).match("cnvkit") } - ) - } - } -} diff --git a/tests/pipeline/cnvkit.nf.test.snap b/tests/pipeline/cnvkit.nf.test.snap deleted file mode 100644 index 15ffc8e..0000000 --- a/tests/pipeline/cnvkit.nf.test.snap +++ /dev/null @@ -1,26 +0,0 @@ -{ - "software_versions": { - "content": [ - "{CNVKIT_BATCH={cnvkit=0.9.10}, Workflow={nf-core/createpanelrefs=v1.0dev}}" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-02-21T12:34:52.978702536" - }, - "cnvkit": { - "content": [ - "panel.cnn:md5,68028cd2b4e0fc4489bf5bfd0a73440f", - "test.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", - "test.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,0067cc3a0e479b23ab3bf056cead31b4", - "test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", - "test2.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,1e1012812eb893afd931485cb760294e" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-11T16:44:31.390826495" - } -} \ No newline at end of file diff --git a/tests/pipeline/default.nf.test b/tests/pipeline/default.nf.test deleted file mode 100644 index 4d24bf1..0000000 --- a/tests/pipeline/default.nf.test +++ /dev/null @@ -1,25 +0,0 @@ -nextflow_pipeline { - - name "Test pipeline" - script "main.nf" - tag "default" - - test("Run default test") { - - when { - params { - outdir = "$outputDir" - validationSchemaIgnoreParams = 'genomes,baseDir,base-dir,outputDir,output-dir' - } - } - - then { - assertAll( - { assert workflow.success }, - { assert new File("$outputDir/multiqc/").exists() }, - { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, - { assert snapshot(path("$outputDir/reference/cnvkit/").list()).match("cnvkit") } - ) - } - } -} diff --git a/tests/pipeline/default.nf.test.snap b/tests/pipeline/default.nf.test.snap deleted file mode 100644 index 4d21008..0000000 --- a/tests/pipeline/default.nf.test.snap +++ /dev/null @@ -1,26 +0,0 @@ -{ - "software_versions": { - "content": [ - "{CNVKIT_BATCH={cnvkit=0.9.10}, Workflow={nf-core/createpanelrefs=v1.0dev}}" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.02.0" - }, - "timestamp": "2024-02-21T12:37:23.523857103" - }, - "cnvkit": { - "content": [ - "panel.cnn:md5,68028cd2b4e0fc4489bf5bfd0a73440f", - "test.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", - "test.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,0067cc3a0e479b23ab3bf056cead31b4", - "test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", - "test2.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,1e1012812eb893afd931485cb760294e" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.02.0" - }, - "timestamp": "2024-04-03T16:55:32.197815" - } -} \ No newline at end of file diff --git a/tests/pipeline/gens_pon.nf.test b/tests/pipeline/gens_pon.nf.test deleted file mode 100644 index 03276be..0000000 --- a/tests/pipeline/gens_pon.nf.test +++ /dev/null @@ -1,56 +0,0 @@ -nextflow_workflow { - - name "Test Workflow GENS_PON" - script "subworkflows/local/gens_pon.nf" - workflow "GENS_PON" - tag "gens" - config "./gens_pon.config" - - test("Run gens test") { - - when { - params { - outdir = "$outputDir" - gens_readcount_format = "TSV" - gens_bin_length = 100 - tools = 'gens' - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" - fai = null - dict = null - gens_pon_name = 'gens_pon' - } - workflow { - """ - input[0] = Channel.empty() - input[1] = Channel.empty() - input[2] = Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() - input[3] = Channel.of( - [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"),[]], - [[ id:'test2' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam"),[]]) - input[4] = params.gens_pon_name - """ - } - } - - then { - assert workflow.success - assert workflow.trace.succeeded().size() == 8 - assert workflow.trace.failed().size() == 0 - - assert path("$outputDir/gens_pon/references/genome.dict").toFile().isFile() - assert path("$outputDir/gens_pon/references/genome.fasta.fai").toFile().isFile() - assert path("$outputDir/gens_pon/createreadcountpanelofnormals/gens_pon.hdf5").toFile().isFile() - - def expected = path("$baseDir/tests/test_assets/gens_software_versions.yaml").yaml.collect() - def observed_list = [] - def observed = workflow.out.versions.collect {f -> path(f).yaml.entrySet()} - observed.stream() - .forEach(observed_list::addAll) - - assertContainsInAnyOrder(expected, observed_list) - - } - - } - -} diff --git a/tests/pipeline/germlinecnvcaller_cohort.nf.test b/tests/pipeline/germlinecnvcaller_cohort.nf.test deleted file mode 100644 index 0dedd87..0000000 --- a/tests/pipeline/germlinecnvcaller_cohort.nf.test +++ /dev/null @@ -1,64 +0,0 @@ -nextflow_workflow { - - name "Test Workflow GERMLINECNVCALLER_COHORT" - script "subworkflows/local/germlinecnvcaller_cohort.nf" - workflow "GERMLINECNVCALLER_COHORT" - tag "germlinecnvcaller" - config "tests/pipeline/germlinecnvcaller_cohort.config" - - test("Run germlinecnvcaller test") { - - when { - params { - outdir = "$outputDir" - tools = 'germlinecnvcaller' - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" - fai = null - dict = null - gcnv_scatter_content = 2 - gcnv_model_name = 'cohort' - gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" - } - workflow { - """ - input[0] = Channel.empty() - input[1] = Channel.empty() - input[2] = Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() - input[3] = Channel.of( - [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"),[]], - [[ id:'test2' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam"),[]]) - input[4] = Channel.fromPath(params.gcnv_ploidy_priors) - input[5] = Channel.value([[:],[]]) - input[6] = Channel.value([[:],[]]) - input[7] = Channel.value([[:],[]]) - input[8] = Channel.value([[:],[]]) - input[9] = Channel.value([[:],[]]) - input[10] = Channel.value([[:],[]]) - input[11] = params.gcnv_model_name - """ - } - } - - then { - assert workflow.success - assert workflow.trace.succeeded().size() == 13 - assert workflow.trace.failed().size() == 0 - - assert path("$outputDir/germlinecnvcaller/references/genome.dict").toFile().isFile() - assert path("$outputDir/germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model").toFile().isDirectory() - assert path("$outputDir/germlinecnvcaller/determinegermlinecontigploidy/cohort-model").toFile().isDirectory() - - - def expected = path("$baseDir/tests/test_assets/germlinecnvcaller_software_versions.yaml").yaml.collect() - def observed_list = [] - def observed = workflow.out.versions.collect {f -> path(f).yaml.entrySet()} - observed.stream() - .forEach(observed_list::addAll) - - assertContainsInAnyOrder(expected, observed_list) - - } - - } - -} diff --git a/tests/pipeline/mutect2.nf.test b/tests/pipeline/mutect2.nf.test deleted file mode 100644 index 767e0f9..0000000 --- a/tests/pipeline/mutect2.nf.test +++ /dev/null @@ -1,26 +0,0 @@ -nextflow_pipeline { - - name "Test MUTECT2_PON" - script "main.nf" - tag "MUTECT2" - - test("Run MUTECT2 test") { - - when { - params { - outdir = "$outputDir" - tools = 'mutect2' - mutect2_pon_name = 'test' - validationSchemaIgnoreParams = 'genomes,baseDir,base-dir,outputDir,output-dir' - } - } - - then { - assertAll( - { assert workflow.success }, - { assert new File("$outputDir/multiqc/").exists() }, - { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") } - ) - } - } -} diff --git a/tests/pipeline/mutect2.nf.test.snap b/tests/pipeline/mutect2.nf.test.snap deleted file mode 100644 index 95bd5fc..0000000 --- a/tests/pipeline/mutect2.nf.test.snap +++ /dev/null @@ -1,12 +0,0 @@ -{ - "software_versions": { - "content": [ - "{GATK4_CREATESOMATICPANELOFNORMALS={gatk4=4.5.0.0}, GATK4_GENOMICSDBIMPORT={gatk4=4.5.0.0}, GATK4_MUTECT2={gatk4=4.5.0.0}, Workflow={nf-core/createpanelrefs=v1.0dev}}" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.02.0" - }, - "timestamp": "2024-04-03T17:06:08.093342" - } -} \ No newline at end of file diff --git a/tests/test_assets/gens_software_versions.yaml b/tests/test_assets/gens_software_versions.yaml deleted file mode 100644 index b9adcb7..0000000 --- a/tests/test_assets/gens_software_versions.yaml +++ /dev/null @@ -1,12 +0,0 @@ -"GENS_PON:SAMTOOLS_INDEX": - samtools: 1.20 -"GENS_PON:GATK4_COLLECTREADCOUNTS": - gatk4: 4.5.0.0 -"GENS_PON:SAMTOOLS_FAIDX": - samtools: 1.20 -"GENS_PON:GATK4_PREPROCESSINTERVALS": - gatk4: 4.5.0.0 -"GENS_PON:PICARD_CREATESEQUENCEDICTIONARY": - picard: 3.1.1 -"GENS_PON:GATK4_CREATEREADCOUNTPANELOFNORMALS": - gatk4: 4.5.0.0 diff --git a/tests/test_assets/germlinecnvcaller_software_versions.yaml b/tests/test_assets/germlinecnvcaller_software_versions.yaml deleted file mode 100644 index cc07404..0000000 --- a/tests/test_assets/germlinecnvcaller_software_versions.yaml +++ /dev/null @@ -1,20 +0,0 @@ -"GERMLINECNVCALLER_COHORT:GATK4_FILTERINTERVALS": - gatk4: 4.5.0.0 -"GERMLINECNVCALLER_COHORT:GATK4_COLLECTREADCOUNTS": - gatk4: 4.5.0.0 -"GERMLINECNVCALLER_COHORT:GATK4_GERMLINECNVCALLER": - gatk4: 4.5.0.0 -"GERMLINECNVCALLER_COHORT:GATK4_DETERMINEGERMLINECONTIGPLOIDY": - gatk4: 4.5.0.0 -"GERMLINECNVCALLER_COHORT:GATK4_ANNOTATEINTERVALS": - gatk4: 4.5.0.0 -"GERMLINECNVCALLER_COHORT:GATK4_INTERVALLISTTOOLS": - gatk4: 4.5.0.0 -"GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS": - gatk4: 4.5.0.0 -"GERMLINECNVCALLER_COHORT:SAMTOOLS_FAIDX": - samtools: 1.20 -"GERMLINECNVCALLER_COHORT:SAMTOOLS_INDEX": - samtools: 1.20 -"GERMLINECNVCALLER_COHORT:PICARD_CREATESEQUENCEDICTIONARY": - picard: 3.1.1 diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 7627fc5..946347e 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -25,8 +25,8 @@ include { BAM_CREATE_SOM_PON_GATK } from '../subworkflows/nf-core/bam_create ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch' +include { MULTIQC } from '../modules/nf-core/multiqc' // Initialize file channels based on params, defined in the params.genomes[params.genome] scope ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName], dict]}.collect() @@ -162,7 +162,7 @@ workflow CREATEPANELREFS { softwareVersionsToYAML(ch_versions) .collectFile( storeDir: "${params.outdir}/pipeline_info", - name: 'nf_core_' + 'pipeline_software_' + 'mqc_' + 'versions.yml', + name: 'nf_core_ceatepanelrefs_software_mqc_versions.yml', sort: true, newLine: true ).set { ch_collated_versions } From 1a916c6dfa3c54dbcef9dc23c3588b3f9a66c565 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Oct 2024 11:25:28 +0200 Subject: [PATCH 130/234] update snapshot --- tests/default.nf.test.snap | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 1b6302e..6ca813f 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,5 +1,5 @@ { - "Run with profile test": { + "Run with profile test | default aka cnvkit": { "content": [ 2, { @@ -44,4 +44,4 @@ }, "timestamp": "2024-10-11T11:10:55.4912" } -} \ No newline at end of file +} From a8ab882b3c6a9af6738f3e1743887c6418f071c7 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Oct 2024 11:27:21 +0200 Subject: [PATCH 131/234] Just 4 shards for now --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bbff218..d0e1f94 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,7 +35,7 @@ jobs: filter: ["workflow", "function", "pipeline"] # filter: ["process", "workflow", "function", "pipeline"] profile: ["conda", "docker", "singularity"] - shard: [1, 2, 3, 4, 5] + shard: [1, 2, 3, 4] isMaster: - ${{ github.base_ref == 'master' }} exclude: From da49c8025350fa09d86ae216fc75ddabafa87b05 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Oct 2024 13:11:41 +0200 Subject: [PATCH 132/234] improve mutect2 tests --- tests/.nftignore | 3 + tests/mutect2.nf.test | 2 +- tests/mutect2.nf.test.snap | 325 +------------------------------------ 3 files changed, 5 insertions(+), 325 deletions(-) diff --git a/tests/.nftignore b/tests/.nftignore index ec58fdc..e5170cd 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -1,3 +1,6 @@ +gatk4/*.{vcf.gz,vcf.gz.tbi} +gatk4/test/* +gatk4/test/** multiqc/multiqc_data/multiqc.log multiqc/multiqc_data/multiqc_data.json multiqc/multiqc_data/multiqc_general_stats.txt diff --git a/tests/mutect2.nf.test b/tests/mutect2.nf.test index 4e63875..61a62f6 100644 --- a/tests/mutect2.nf.test +++ b/tests/mutect2.nf.test @@ -16,7 +16,7 @@ nextflow_pipeline { then { // stable_name: All files + folders in ${params.outdir}/ with a stable name - def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'gatk4/test/**/*']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') assertAll( diff --git a/tests/mutect2.nf.test.snap b/tests/mutect2.nf.test.snap index c9c5f88..dd8e352 100644 --- a/tests/mutect2.nf.test.snap +++ b/tests/mutect2.nf.test.snap @@ -30,167 +30,8 @@ "gatk4/test/__tiledb_workspace.tdb", "gatk4/test/callset.json", "gatk4/test/chr21$2$23354000", - "gatk4/test/chr21$2$23354000/.__consolidation_lock", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/AD.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/AD_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/AF.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/AF_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/ALT.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/ALT_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/AS_SB_TABLE.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/AS_SB_TABLE_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/AS_UNIQ_ALT_READ_COUNT.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/AS_UNIQ_ALT_READ_COUNT_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/CONTQ.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/DP.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/DP_FORMAT.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/ECNT.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/END.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/F1R2.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/F1R2_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/F2R1.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/F2R1_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/FAD.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/FAD_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/FILTER.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/FILTER_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/GERMQ.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/GQ.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/GT.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/GT_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/ID.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/ID_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/MBQ.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/MBQ_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/MFRL.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/MFRL_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/MMQ.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/MMQ_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/MPOS.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/MPOS_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/NALOD.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/NALOD_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/NCount.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/NLOD.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/NLOD_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/OCM.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/PGT.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/PGT_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/PID.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/PID_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/PL.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/PL_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/PON.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/POPAF.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/POPAF_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/PS.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/QUAL.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/REF.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/REF_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/ROQ.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/RPA.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/RPA_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/RU.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/RU_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/SB.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/SEQQ.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/STR.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/STRANDQ.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/STRQ.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/TLOD.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/TLOD_var.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/__book_keeping.tdb.gz", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/__coords.tdb", - "gatk4/test/chr21$2$23354000/__6d81792d-3faf-4b0b-a83f-3970900ce0c3140170370176704_1728638204392/__tiledb_fragment.tdb", - "gatk4/test/chr21$2$23354000/__array_schema.tdb", - "gatk4/test/chr21$2$23354000/genomicsdb_meta_dir", - "gatk4/test/chr21$2$23354000/genomicsdb_meta_dir/genomicsdb_column_bounds.json", - "gatk4/test/chr21$2$23354000/genomicsdb_meta_dir/genomicsdb_meta_2fa49a66-d70b-472f-b420-a0eca92715ab.json", "gatk4/test/chr21$24132500$24910998", - "gatk4/test/chr21$24132500$24910998/.__consolidation_lock", - "gatk4/test/chr21$24132500$24910998/__array_schema.tdb", - "gatk4/test/chr21$24132500$24910998/genomicsdb_meta_dir", - "gatk4/test/chr21$24132500$24910998/genomicsdb_meta_dir/genomicsdb_column_bounds.json", - "gatk4/test/chr21$24132500$24910998/genomicsdb_meta_dir/genomicsdb_meta_d23be0e8-765f-4373-887f-74a701b5d7c1.json", "gatk4/test/chr21$25689498$46709983", - "gatk4/test/chr21$25689498$46709983/.__consolidation_lock", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/AD.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/AD_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/AF.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/AF_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/ALT.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/ALT_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/AS_SB_TABLE.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/AS_SB_TABLE_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/AS_UNIQ_ALT_READ_COUNT.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/AS_UNIQ_ALT_READ_COUNT_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/CONTQ.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/DP.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/DP_FORMAT.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/ECNT.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/END.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/F1R2.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/F1R2_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/F2R1.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/F2R1_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/FAD.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/FAD_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/FILTER.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/FILTER_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/GERMQ.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/GQ.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/GT.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/GT_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/ID.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/ID_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/MBQ.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/MBQ_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/MFRL.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/MFRL_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/MMQ.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/MMQ_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/MPOS.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/MPOS_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/NALOD.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/NALOD_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/NCount.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/NLOD.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/NLOD_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/OCM.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/PGT.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/PGT_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/PID.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/PID_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/PL.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/PL_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/PON.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/POPAF.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/POPAF_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/PS.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/QUAL.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/REF.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/REF_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/ROQ.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/RPA.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/RPA_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/RU.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/RU_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/SB.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/SEQQ.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/STR.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/STRANDQ.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/STRQ.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/TLOD.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/TLOD_var.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/__book_keeping.tdb.gz", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/__coords.tdb", - "gatk4/test/chr21$25689498$46709983/__8ec66c56-124d-4e2a-8830-5b3e08932d1b140170370176704_1728638204900/__tiledb_fragment.tdb", - "gatk4/test/chr21$25689498$46709983/__array_schema.tdb", - "gatk4/test/chr21$25689498$46709983/genomicsdb_meta_dir", - "gatk4/test/chr21$25689498$46709983/genomicsdb_meta_dir/genomicsdb_column_bounds.json", - "gatk4/test/chr21$25689498$46709983/genomicsdb_meta_dir/genomicsdb_meta_fbfd9118-39ee-4677-b032-6b3d1bd1112d.json", "gatk4/test/vcfheader.vcf", "gatk4/test/vidmap.json", "multiqc", @@ -205,172 +46,8 @@ "pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml" ], [ - "sample1.vcf.gz:md5,a63db49bc7baa4be03a628874d84253c", "sample1.vcf.gz.stats:md5,b569ce66bbffe9588b3d221e821023ee", - "sample1.vcf.gz.tbi:md5,36840415631bb7c34020226e52f3d6e0", - "sample2.vcf.gz:md5,97926f56dd5658ee3919c6519d44fea7", "sample2.vcf.gz.stats:md5,76f749c53212d72e98801f6030fbf8a6", - "sample2.vcf.gz.tbi:md5,4d590889c2dcd94f34f299fe332e7ace", - "test.vcf.gz:md5,114f52209d7eb6e3535a9e919c3ef750", - "test.vcf.gz.tbi:md5,16cd8d8eef807e56e7fa07511e770fc4", - "__tiledb_workspace.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "callset.json:md5,9123ad4a3154d434f043bf33f620fdc0", - ".__consolidation_lock:md5,d41d8cd98f00b204e9800998ecf8427e", - "AD.tdb:md5,25e5ccdc9d8c69b8c15c8df2d1ed3d9b", - "AD_var.tdb:md5,c6298c72157f5eef097c8d2ec2d8d69b", - "AF.tdb:md5,5cbd55d3cd8d16abc21966594ce57fc3", - "AF_var.tdb:md5,198d9f4aa5fe6317fe0b95ee7150f9e2", - "ALT.tdb:md5,0c7adeac8eb9f5e5ad8d2f329d9724d8", - "ALT_var.tdb:md5,d5e686faf8ef044da6fbd8bafefa2112", - "AS_SB_TABLE.tdb:md5,fb1bb3202a5edd9476715e6104cf5efb", - "AS_SB_TABLE_var.tdb:md5,ce1fbe64dd090f27e4c6cd0797f736e6", - "AS_UNIQ_ALT_READ_COUNT.tdb:md5,f0f251db348182d0e1c0b90c9c4b3a26", - "AS_UNIQ_ALT_READ_COUNT_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "CONTQ.tdb:md5,50d93ee3e417eb4fa8fc5cfa2f6d5e7a", - "DP.tdb:md5,cefb507760e66eea7ae9177f0621f6d6", - "DP_FORMAT.tdb:md5,93fd90800abe7fb9f801d3b6f25c66ee", - "ECNT.tdb:md5,12a5e5d13ac7d5690f8dff812c2bc016", - "END.tdb:md5,e599ed1cff39dbb8cee8b4e1e6e97225", - "F1R2.tdb:md5,25e5ccdc9d8c69b8c15c8df2d1ed3d9b", - "F1R2_var.tdb:md5,1ff77c8ab25de51242af2e5f817d95c9", - "F2R1.tdb:md5,25e5ccdc9d8c69b8c15c8df2d1ed3d9b", - "F2R1_var.tdb:md5,f0f251db348182d0e1c0b90c9c4b3a26", - "FAD.tdb:md5,25e5ccdc9d8c69b8c15c8df2d1ed3d9b", - "FAD_var.tdb:md5,bb4669be39cbaf3b0317ae6c149d9a40", - "FILTER.tdb:md5,f0f251db348182d0e1c0b90c9c4b3a26", - "FILTER_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "GERMQ.tdb:md5,79096811e8da4fe8e2d6222bab0da84c", - "GQ.tdb:md5,79096811e8da4fe8e2d6222bab0da84c", - "GT.tdb:md5,4f6ef31d1b6c72de7b57676f7a1a137b", - "GT_var.tdb:md5,d47feb14db80563cb02c0d555c6f5fcb", - "ID.tdb:md5,f0f251db348182d0e1c0b90c9c4b3a26", - "ID_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "MBQ.tdb:md5,25e5ccdc9d8c69b8c15c8df2d1ed3d9b", - "MBQ_var.tdb:md5,f7c52efa68a18663e34634328e1cbdc7", - "MFRL.tdb:md5,25e5ccdc9d8c69b8c15c8df2d1ed3d9b", - "MFRL_var.tdb:md5,c2cdfeeda7b925527f0339e6d5c17260", - "MMQ.tdb:md5,25e5ccdc9d8c69b8c15c8df2d1ed3d9b", - "MMQ_var.tdb:md5,51154b8d2d36ffa27cd4b0c9e3ae7a5b", - "MPOS.tdb:md5,5cbd55d3cd8d16abc21966594ce57fc3", - "MPOS_var.tdb:md5,7e6ef7060c1bb2ee518b9fe44c12f347", - "NALOD.tdb:md5,f0f251db348182d0e1c0b90c9c4b3a26", - "NALOD_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "NCount.tdb:md5,79096811e8da4fe8e2d6222bab0da84c", - "NLOD.tdb:md5,f0f251db348182d0e1c0b90c9c4b3a26", - "NLOD_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "OCM.tdb:md5,79096811e8da4fe8e2d6222bab0da84c", - "PGT.tdb:md5,05d194cf0ffc4996654e0b8c8a8eb398", - "PGT_var.tdb:md5,73bfb9313cd44680edd7a04d71ca9fce", - "PID.tdb:md5,23af41fb82e6603db19601b0be66493a", - "PID_var.tdb:md5,eb3a0ddfbb1dab3ce49ab77cfa7f99fc", - "PL.tdb:md5,f0f251db348182d0e1c0b90c9c4b3a26", - "PL_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "PON.tdb:md5,e575419bd56e92436577ff6d3e4111f0", - "POPAF.tdb:md5,5cbd55d3cd8d16abc21966594ce57fc3", - "POPAF_var.tdb:md5,cd28781617a019cf46ad8e1038da88c7", - "PS.tdb:md5,d8cf85d78d219a031776f6f515056aec", - "QUAL.tdb:md5,50d93ee3e417eb4fa8fc5cfa2f6d5e7a", - "REF.tdb:md5,83700bfc6d35d223e06a74b058187930", - "REF_var.tdb:md5,4d6ea0c8274a3dd075c6c2a6bbf3f677", - "ROQ.tdb:md5,50d93ee3e417eb4fa8fc5cfa2f6d5e7a", - "RPA.tdb:md5,b5f8bdcaa40febf3dd0e060a3970b837", - "RPA_var.tdb:md5,76f38604002ea9636ec5ffba2d7bf944", - "RU.tdb:md5,835980cee5a0b7ef97b8ecba32c34e22", - "RU_var.tdb:md5,6a93cc1625c89597aded01d38fa055e1", - "SB.tdb:md5,e543301811b472ebd2b09768d730f3fd", - "SEQQ.tdb:md5,79096811e8da4fe8e2d6222bab0da84c", - "STR.tdb:md5,71d5aef955aad96bf10985b87c95942b", - "STRANDQ.tdb:md5,79096811e8da4fe8e2d6222bab0da84c", - "STRQ.tdb:md5,79096811e8da4fe8e2d6222bab0da84c", - "TLOD.tdb:md5,5cbd55d3cd8d16abc21966594ce57fc3", - "TLOD_var.tdb:md5,09be85fd8c5e723f194ac0943c534b0d", - "__book_keeping.tdb.gz:md5,7668f7d0db0c302012df3474906c9925", - "__coords.tdb:md5,1cc3f9ba8c908a44b40a98badfff61c4", - "__tiledb_fragment.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "__array_schema.tdb:md5,d983ef3d226f3c38f68bc4f32ecda33c", - "genomicsdb_column_bounds.json:md5,fbd648f7064cb9a10b3b32a7cae34c9a", - "genomicsdb_meta_2fa49a66-d70b-472f-b420-a0eca92715ab.json:md5,d65998127173145d94d773e3350881e5", - ".__consolidation_lock:md5,d41d8cd98f00b204e9800998ecf8427e", - "__array_schema.tdb:md5,b9993caa8b3d41d059b10ada5101d9e8", - "genomicsdb_column_bounds.json:md5,e1122df80f0b75d9103e6616ed52b9ee", - "genomicsdb_meta_d23be0e8-765f-4373-887f-74a701b5d7c1.json:md5,d65998127173145d94d773e3350881e5", - ".__consolidation_lock:md5,d41d8cd98f00b204e9800998ecf8427e", - "AD.tdb:md5,d520808c95b3d0ee9847e7923ebaaa6c", - "AD_var.tdb:md5,e90e2bc5a01648a3e07580b2034c44c6", - "AF.tdb:md5,89f24fce2fe1ae768a4f87f6c5644dc4", - "AF_var.tdb:md5,def80c5a7585274c98a766295bfedfe7", - "ALT.tdb:md5,ba6115e1ffe49ff75479308f32db93d6", - "ALT_var.tdb:md5,2a40741d36c3bc86c2f5f367f767cfd7", - "AS_SB_TABLE.tdb:md5,72d1f4116d2cbc75fa99b253d26d4ad9", - "AS_SB_TABLE_var.tdb:md5,c752faee89401485bc1fd70556b7d4f6", - "AS_UNIQ_ALT_READ_COUNT.tdb:md5,59eec9e1c659b9a126a5b23271d964a5", - "AS_UNIQ_ALT_READ_COUNT_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "CONTQ.tdb:md5,bdc87df1ac52acb83558b0f32fa08879", - "DP.tdb:md5,30b50605950fad37342590dc9ffdb11a", - "DP_FORMAT.tdb:md5,67d55b077b5e29894448d9b69d1fff58", - "ECNT.tdb:md5,7cf820a3342d7b52f0d75d5a0b15ed64", - "END.tdb:md5,27c5810d48524cce702f3b7ce3515ea5", - "F1R2.tdb:md5,d520808c95b3d0ee9847e7923ebaaa6c", - "F1R2_var.tdb:md5,77b8e09f76d9a09d9c46e6e116af82d9", - "F2R1.tdb:md5,d520808c95b3d0ee9847e7923ebaaa6c", - "F2R1_var.tdb:md5,59eec9e1c659b9a126a5b23271d964a5", - "FAD.tdb:md5,d520808c95b3d0ee9847e7923ebaaa6c", - "FAD_var.tdb:md5,48af683eafcb66c8dad9951ffe88d04a", - "FILTER.tdb:md5,59eec9e1c659b9a126a5b23271d964a5", - "FILTER_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "GERMQ.tdb:md5,17ee8dd46c227a6c999de499e46327ca", - "GQ.tdb:md5,17ee8dd46c227a6c999de499e46327ca", - "GT.tdb:md5,8968e4582af714b67f8dceb07e8991ca", - "GT_var.tdb:md5,4ecf12b42be9b68badebb1c690c0809e", - "ID.tdb:md5,59eec9e1c659b9a126a5b23271d964a5", - "ID_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "MBQ.tdb:md5,d520808c95b3d0ee9847e7923ebaaa6c", - "MBQ_var.tdb:md5,eb85c62a51886544118df9dfaa5045c3", - "MFRL.tdb:md5,d520808c95b3d0ee9847e7923ebaaa6c", - "MFRL_var.tdb:md5,c1bf3db7ef8dd15836dec497574d778f", - "MMQ.tdb:md5,d520808c95b3d0ee9847e7923ebaaa6c", - "MMQ_var.tdb:md5,8f266ac111f437dd74eaeb80f054d3ed", - "MPOS.tdb:md5,89f24fce2fe1ae768a4f87f6c5644dc4", - "MPOS_var.tdb:md5,d330fe7e1921b80da418689d1dbac682", - "NALOD.tdb:md5,59eec9e1c659b9a126a5b23271d964a5", - "NALOD_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "NCount.tdb:md5,17ee8dd46c227a6c999de499e46327ca", - "NLOD.tdb:md5,59eec9e1c659b9a126a5b23271d964a5", - "NLOD_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "OCM.tdb:md5,17ee8dd46c227a6c999de499e46327ca", - "PGT.tdb:md5,1c9fcbab7490fde6bf8f1c6c98dd6fe1", - "PGT_var.tdb:md5,b23eb2be3856f3c8e98032041a19d135", - "PID.tdb:md5,9f7589ebbef726f2d4bb518ed2fce1fe", - "PID_var.tdb:md5,47bacc6fe0f1b318747dc56ebb4d6e3e", - "PL.tdb:md5,59eec9e1c659b9a126a5b23271d964a5", - "PL_var.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "PON.tdb:md5,7915e8a208af602b1f603ad88ca0613b", - "POPAF.tdb:md5,89f24fce2fe1ae768a4f87f6c5644dc4", - "POPAF_var.tdb:md5,23f91f0f95fe5fce57c2b3ae5027f196", - "PS.tdb:md5,e6ccd6a8ff224f0780b88a533489cefc", - "QUAL.tdb:md5,bdc87df1ac52acb83558b0f32fa08879", - "REF.tdb:md5,c4dedc9bd820cce6afb9558bfb5217a4", - "REF_var.tdb:md5,963bf8052cc72f58a4e9247d476660be", - "ROQ.tdb:md5,bdc87df1ac52acb83558b0f32fa08879", - "RPA.tdb:md5,0b5b1b1ec7a46a2c2ac451de952440f9", - "RPA_var.tdb:md5,0e3ab36d395525c86e4122a613ca2e9c", - "RU.tdb:md5,80e670397dc61b8d4f65dd148758cbcf", - "RU_var.tdb:md5,e856786f8d55b52e3406d8f4f31de900", - "SB.tdb:md5,f13d419074793d70c6c66305d56235e6", - "SEQQ.tdb:md5,17ee8dd46c227a6c999de499e46327ca", - "STR.tdb:md5,da41a617915015415f8b8a5499fa18b0", - "STRANDQ.tdb:md5,17ee8dd46c227a6c999de499e46327ca", - "STRQ.tdb:md5,17ee8dd46c227a6c999de499e46327ca", - "TLOD.tdb:md5,89f24fce2fe1ae768a4f87f6c5644dc4", - "TLOD_var.tdb:md5,0f73e670bb2aca83b861d83cd908ae3a", - "__book_keeping.tdb.gz:md5,7f2bd5c335cf117ff7c0bf13d38f68e3", - "__coords.tdb:md5,78c8fb3250f31495f89a96047ff1f8ac", - "__tiledb_fragment.tdb:md5,d41d8cd98f00b204e9800998ecf8427e", - "__array_schema.tdb:md5,f623aa15e93722a5ab35423bc60adb74", - "genomicsdb_column_bounds.json:md5,41f164432abe9a6d6c559d90e9a7cabc", - "genomicsdb_meta_fbfd9118-39ee-4677-b032-6b3d1bd1112d.json:md5,d65998127173145d94d773e3350881e5", - "vcfheader.vcf:md5,3eed5f181c05191fe5f43cfb64a255f3", - "vidmap.json:md5,bdd4857ec33a3a7a01211bfd21037a7a", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], @@ -378,6 +55,6 @@ "nf-test": "0.9.0", "nextflow": "24.09.0" }, - "timestamp": "2024-10-11T11:16:59.841003" + "timestamp": "2024-10-11T12:02:22.796491" } } \ No newline at end of file From 78d8befbe44ebcae790cafc7cd1399e174fc8de5 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Oct 2024 14:42:14 +0200 Subject: [PATCH 133/234] fix gens and germlinecnvcaller tests --- tests/csv/1.0.0/bam_sorted.csv | 3 + tests/gens_pon.nf.test | 3 + tests/gens_pon.nf.test.snap | 64 +++++ tests/germlinecnvcaller_cohort.nf.test | 3 + tests/germlinecnvcaller_cohort.nf.test.snap | 263 ++++++++++++++++++++ 5 files changed, 336 insertions(+) create mode 100644 tests/csv/1.0.0/bam_sorted.csv create mode 100644 tests/gens_pon.nf.test.snap create mode 100644 tests/germlinecnvcaller_cohort.nf.test.snap diff --git a/tests/csv/1.0.0/bam_sorted.csv b/tests/csv/1.0.0/bam_sorted.csv new file mode 100644 index 0000000..46428ed --- /dev/null +++ b/tests/csv/1.0.0/bam_sorted.csv @@ -0,0 +1,3 @@ +sample,bam,bai +sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai +sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai diff --git a/tests/gens_pon.nf.test b/tests/gens_pon.nf.test index 2a2ce10..6977905 100644 --- a/tests/gens_pon.nf.test +++ b/tests/gens_pon.nf.test @@ -10,10 +10,13 @@ nextflow_pipeline { when { params { + dict = null + fai = null fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" gens_bin_length = 100 gens_pon_name = 'gens_pon' gens_readcount_format = "TSV" + input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" outdir = "$outputDir" tools = 'gens' } diff --git a/tests/gens_pon.nf.test.snap b/tests/gens_pon.nf.test.snap new file mode 100644 index 0000000..9e1a3f0 --- /dev/null +++ b/tests/gens_pon.nf.test.snap @@ -0,0 +1,64 @@ +{ + "Run gens test": { + "content": [ + 7, + { + "GATK4_COLLECTREADCOUNTS": { + "gatk4": "4.5.0.0" + }, + "GATK4_CREATEREADCOUNTPANELOFNORMALS": { + "gatk4": "4.5.0.0" + }, + "GATK4_PREPROCESSINTERVALS": { + "gatk4": "4.5.0.0" + }, + "PICARD_CREATESEQUENCEDICTIONARY": { + "picard": "3.1.1" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.2 + }, + "Workflow": { + "nf-core/createpanelrefs": "v1.0dev" + } + }, + [ + "gens_pon", + "gens_pon/createreadcountpanelofnormals", + "gens_pon/createreadcountpanelofnormals/gens_pon.hdf5", + "gens_pon/intervals", + "gens_pon/intervals/genome.interval_list", + "gens_pon/readcounts", + "gens_pon/readcounts/sample1.tsv", + "gens_pon/readcounts/sample2.tsv", + "gens_pon/references", + "gens_pon/references/genome.dict", + "gens_pon/references/genome.fasta.fai", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml" + ], + [ + "gens_pon.hdf5:md5,31077d93155c1ffb0ba1904b5056386a", + "genome.interval_list:md5,aa0fc11d901dd9b3bbfd788f83d31800", + "sample1.tsv:md5,bfceab35109b04317f38f9cc1cde71ca", + "sample2.tsv:md5,7141d08cdc26f6057557be9e23ef4365", + "genome.dict:md5,07f6990366eb7835eeecae174ad3db6a", + "genome.fasta.fai:md5,3520cd30e1b100e55f578db9c855f685", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-11T14:12:35.334538" + } +} \ No newline at end of file diff --git a/tests/germlinecnvcaller_cohort.nf.test b/tests/germlinecnvcaller_cohort.nf.test index 612c9d1..ba9bd6d 100644 --- a/tests/germlinecnvcaller_cohort.nf.test +++ b/tests/germlinecnvcaller_cohort.nf.test @@ -10,10 +10,13 @@ nextflow_pipeline { when { params { + dict = null + fai = null fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" gcnv_model_name = 'cohort' gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" gcnv_scatter_content = 2 + input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" outdir = "$outputDir" tools = 'germlinecnvcaller' } diff --git a/tests/germlinecnvcaller_cohort.nf.test.snap b/tests/germlinecnvcaller_cohort.nf.test.snap new file mode 100644 index 0000000..040224d --- /dev/null +++ b/tests/germlinecnvcaller_cohort.nf.test.snap @@ -0,0 +1,263 @@ +{ + "Run germlinecnvcaller test": { + "content": [ + 12, + { + "GATK4_ANNOTATEINTERVALS": { + "gatk4": "4.5.0.0" + }, + "GATK4_COLLECTREADCOUNTS": { + "gatk4": "4.5.0.0" + }, + "GATK4_DETERMINEGERMLINECONTIGPLOIDY": { + "gatk4": "4.5.0.0" + }, + "GATK4_FILTERINTERVALS": { + "gatk4": "4.5.0.0" + }, + "GATK4_GERMLINECNVCALLER": { + "gatk4": "4.5.0.0" + }, + "GATK4_INTERVALLISTTOOLS": { + "gatk4": "4.5.0.0" + }, + "GATK4_PREPROCESSINTERVALS": { + "gatk4": "4.5.0.0" + }, + "PICARD_CREATESEQUENCEDICTIONARY": { + "picard": "3.1.1" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.2 + }, + "Workflow": { + "nf-core/createpanelrefs": "v1.0dev" + } + }, + [ + "germlinecnvcaller", + "germlinecnvcaller/determinegermlinecontigploidy", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/contig_ploidy_prior.tsv", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/gcnvkernel_version.json", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/interval_list.tsv", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/mu_mean_bias_j_lowerbound__.tsv", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/mu_psi_j_log__.tsv", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/ploidy_config.json", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/std_mean_bias_j_lowerbound__.tsv", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/std_psi_j_log__.tsv", + "germlinecnvcaller/germlinecnvcaller", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/baseline_copy_number_t.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/log_c_emission_tc.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/log_q_c_tc.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/mu_denoised_copy_ratio_t.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/mu_psi_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/mu_read_depth_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/mu_z_su.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/sample_name.txt", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/std_denoised_copy_ratio_t.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/std_psi_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/std_read_depth_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_0/std_z_su.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/baseline_copy_number_t.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/log_c_emission_tc.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/log_q_c_tc.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/mu_denoised_copy_ratio_t.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/mu_psi_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/mu_read_depth_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/mu_z_su.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/sample_name.txt", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/std_denoised_copy_ratio_t.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/std_psi_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/std_read_depth_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/SAMPLE_1/std_z_su.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/calling_config.json", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/denoising_config.json", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/gcnvkernel_version.json", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-calls/interval_list.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/calling_config.json", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/denoising_config.json", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/gcnvkernel_version.json", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/interval_list.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/log_q_tau_tk.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/mu_W_tu.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/mu_ard_u_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/mu_log_mean_bias_t.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/mu_psi_t_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/std_W_tu.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/std_ard_u_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/std_log_mean_bias_t.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/std_psi_t_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/baseline_copy_number_t.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/log_c_emission_tc.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/log_q_c_tc.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/mu_denoised_copy_ratio_t.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/mu_psi_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/mu_read_depth_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/mu_z_su.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/sample_name.txt", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/std_denoised_copy_ratio_t.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/std_psi_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/std_read_depth_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_0/std_z_su.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/baseline_copy_number_t.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/log_c_emission_tc.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/log_q_c_tc.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/mu_denoised_copy_ratio_t.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/mu_psi_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/mu_read_depth_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/mu_z_su.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/sample_name.txt", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/std_denoised_copy_ratio_t.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/std_psi_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/std_read_depth_s_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/SAMPLE_1/std_z_su.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/calling_config.json", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/denoising_config.json", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/gcnvkernel_version.json", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-calls/interval_list.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/calling_config.json", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/denoising_config.json", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/gcnvkernel_version.json", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/interval_list.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/log_q_tau_tk.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/mu_W_tu.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/mu_ard_u_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/mu_log_mean_bias_t.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/mu_psi_t_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_W_tu.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_ard_u_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_log_mean_bias_t.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_psi_t_log__.tsv", + "germlinecnvcaller/readcounts", + "germlinecnvcaller/readcounts/sample1.hdf5", + "germlinecnvcaller/readcounts/sample2.hdf5", + "germlinecnvcaller/references", + "germlinecnvcaller/references/genome.dict", + "germlinecnvcaller/references/genome.fasta.fai", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml" + ], + [ + "contig_ploidy_prior.tsv:md5,7a2f5444b09a1f635a540bbcd23176cf", + "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", + "interval_list.tsv:md5,8c5aaf57cf34ff35b183178a87a9f864", + "mu_mean_bias_j_lowerbound__.tsv:md5,76bfcb151ebf3c97cf9e3499891eb652", + "mu_psi_j_log__.tsv:md5,82bd39a4dc5395c07522f2c24f7b2a0e", + "ploidy_config.json:md5,4cdc16109826fa7d3cdfd1dc8758ec27", + "std_mean_bias_j_lowerbound__.tsv:md5,9b31e5eecfd8ec379bc7ff126d599ab9", + "std_psi_j_log__.tsv:md5,2f77d2060b0d8adc3d0f486b0ad2985a", + "baseline_copy_number_t.tsv:md5,a8f848d75d241f7932ffdca261bffe7b", + "log_c_emission_tc.tsv:md5,0748bf2a65d30bf05ce3369cb10717ce", + "log_q_c_tc.tsv:md5,7b59a0e6d19b12f2eff4d297c629bb7f", + "mu_denoised_copy_ratio_t.tsv:md5,3f7093217a4b1608860b3e644a33fefe", + "mu_psi_s_log__.tsv:md5,16d70d8f81be635c23bf5c5b3c9fac95", + "mu_read_depth_s_log__.tsv:md5,8d333418ef1c0f6000d8cd1d5991bf0a", + "mu_z_su.tsv:md5,c87b85c2d5ec774344c3b134b5a92b39", + "sample_name.txt:md5,5cf41871eea16c0fbfdcb7f0f664ed95", + "std_denoised_copy_ratio_t.tsv:md5,0e413c4a5ef8ae38b3aae3546e6ca13f", + "std_psi_s_log__.tsv:md5,a8d6bfb303daf3159d8ca4d8b3308ef4", + "std_read_depth_s_log__.tsv:md5,4b8d04c940be597eca6e9440d83426af", + "std_z_su.tsv:md5,97141ce78b363f8c749f9dabaa8f71d1", + "baseline_copy_number_t.tsv:md5,51670372d61cf525a2303d293129fc2e", + "log_c_emission_tc.tsv:md5,7f3d964fabec816b66612fff5b5c5aa2", + "log_q_c_tc.tsv:md5,bc179abdd6232d51ad157df350e3f5fd", + "mu_denoised_copy_ratio_t.tsv:md5,690a41c02d96d9d7a9be1a47d5fd7369", + "mu_psi_s_log__.tsv:md5,f8d624a7831fd0945ae183145947a846", + "mu_read_depth_s_log__.tsv:md5,80432c22763a397ad25282f7d72b0fcd", + "mu_z_su.tsv:md5,fa3dab26a0e25ceb114290e1532fbc60", + "sample_name.txt:md5,640a474d6f73b01524287f50ca538c04", + "std_denoised_copy_ratio_t.tsv:md5,bed016c512813ca0ce58965acc59500e", + "std_psi_s_log__.tsv:md5,9e8d00a4c9abd6d3b745e92dd113fea5", + "std_read_depth_s_log__.tsv:md5,0a911950d0e2382dad7a487b59c4478b", + "std_z_su.tsv:md5,6de492e11cf75d68db013adc708dec41", + "calling_config.json:md5,dc78889e3d0ba1adf02d10db0194128e", + "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", + "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", + "interval_list.tsv:md5,0cba6efc5fa465f7930c901916a3e764", + "calling_config.json:md5,dc78889e3d0ba1adf02d10db0194128e", + "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", + "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", + "interval_list.tsv:md5,0cba6efc5fa465f7930c901916a3e764", + "log_q_tau_tk.tsv:md5,10f1fbf986d3b208609f324b332971a8", + "mu_W_tu.tsv:md5,98109ec936c1db20c4c3755d3a7badfe", + "mu_ard_u_log__.tsv:md5,f1b230c7a55edf9d7a843dfe6dac0215", + "mu_log_mean_bias_t.tsv:md5,75ae725922f6091436577891cf0f9f16", + "mu_psi_t_log__.tsv:md5,fa680d610797e70e6570c0ff0fc8571c", + "std_W_tu.tsv:md5,6352d533546632d1d9315fbe758f53a8", + "std_ard_u_log__.tsv:md5,1af61ffd2028816fc1965351039db795", + "std_log_mean_bias_t.tsv:md5,22fc650dd573543afa303f0bb3d2318c", + "std_psi_t_log__.tsv:md5,bcd1b4e7906035a752ad96eb797f296b", + "baseline_copy_number_t.tsv:md5,a8f848d75d241f7932ffdca261bffe7b", + "log_c_emission_tc.tsv:md5,19616a39502b839c638ceb653d31f041", + "log_q_c_tc.tsv:md5,02f6ad2cc53e1eddc8195d55bb1a126a", + "mu_denoised_copy_ratio_t.tsv:md5,ccbfb670b45a8ddeaee4ee3d551571c7", + "mu_psi_s_log__.tsv:md5,d21c650d92b4888ce3aaa55e5a1291d4", + "mu_read_depth_s_log__.tsv:md5,b052485317c02183d43fcc6754ef5beb", + "mu_z_su.tsv:md5,43669475085e3197e1ded507eb34b536", + "sample_name.txt:md5,5cf41871eea16c0fbfdcb7f0f664ed95", + "std_denoised_copy_ratio_t.tsv:md5,4665c3671b0d5d6b198285c7a8744e61", + "std_psi_s_log__.tsv:md5,95e09044cf38a3e6b89646564ac0d247", + "std_read_depth_s_log__.tsv:md5,82b422a2a5f91fdb5d97395bfeb5d8e2", + "std_z_su.tsv:md5,1974f65e053ad05910f2ec8bb21d2bcb", + "baseline_copy_number_t.tsv:md5,51670372d61cf525a2303d293129fc2e", + "log_c_emission_tc.tsv:md5,2bc684d5e2dbd9b4bce79ed5635ab6af", + "log_q_c_tc.tsv:md5,a6ef596969e5e89566f2924e96e86951", + "mu_denoised_copy_ratio_t.tsv:md5,6ff4d7eb72ce770cdff2b36845e2a0d3", + "mu_psi_s_log__.tsv:md5,47d38b45a67590f18bc2aa99788fbbfb", + "mu_read_depth_s_log__.tsv:md5,e38ad1da0efd3780ae04cd3f2dcfe336", + "mu_z_su.tsv:md5,38469a673b1d4adb3b7aea55495667e6", + "sample_name.txt:md5,640a474d6f73b01524287f50ca538c04", + "std_denoised_copy_ratio_t.tsv:md5,e1cce7501fe552868c6b8ed69158147f", + "std_psi_s_log__.tsv:md5,3908edbdf73919c1908451f7450a9c85", + "std_read_depth_s_log__.tsv:md5,06a964068988148b856079df77dfeff9", + "std_z_su.tsv:md5,fd1fa6c54f02bdca72b928ddefeb088b", + "calling_config.json:md5,dc78889e3d0ba1adf02d10db0194128e", + "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", + "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", + "interval_list.tsv:md5,9f7718372e87e69aeb837678755a9971", + "calling_config.json:md5,dc78889e3d0ba1adf02d10db0194128e", + "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", + "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", + "interval_list.tsv:md5,9f7718372e87e69aeb837678755a9971", + "log_q_tau_tk.tsv:md5,e14fa02f301cfb42d058d95a9a438aad", + "mu_W_tu.tsv:md5,fdd960e9dc1ef0de49546f7bc9862557", + "mu_ard_u_log__.tsv:md5,4e653386ceff2108340d6d064082934c", + "mu_log_mean_bias_t.tsv:md5,5b27383c83d3498b93a2253c3bc53432", + "mu_psi_t_log__.tsv:md5,e842bda2a1fd39205fcb9298ace5bd0c", + "std_W_tu.tsv:md5,65aa1894391567b595ee49f0d4321190", + "std_ard_u_log__.tsv:md5,bb973bd96eb1ebd41020cb6d2ae98603", + "std_log_mean_bias_t.tsv:md5,32b2081e95c279556c9831bdc1e49631", + "std_psi_t_log__.tsv:md5,2eb3de95440b579319e2556767f8404d", + "sample1.hdf5:md5,59e1b75cc5cbf0225000fc9ddbd4ce22", + "sample2.hdf5:md5,8dc7e1a1e6f6134612d72eef1a195a7e", + "genome.dict:md5,5e66d0ced1424dc24a4c01e98b39ece5", + "genome.fasta.fai:md5,3520cd30e1b100e55f578db9c855f685", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-11T14:40:54.111307" + } +} \ No newline at end of file From db922caddb0dd138e45258a5cdfcc60313597ad0 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Oct 2024 15:28:00 +0200 Subject: [PATCH 134/234] fix gens snapshots --- tests/.nftignore | 3 +++ tests/gens_pon.nf.test.snap | 5 +---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/.nftignore b/tests/.nftignore index e5170cd..9414705 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -1,6 +1,9 @@ gatk4/*.{vcf.gz,vcf.gz.tbi} gatk4/test/* gatk4/test/** +gens_pon/createreadcountpanelofnormals/gens_pon.hdf5 +gens_pon/intervals/genome.interval_list +gens_pon/references/genome.dict multiqc/multiqc_data/multiqc.log multiqc/multiqc_data/multiqc_data.json multiqc/multiqc_data/multiqc_general_stats.txt diff --git a/tests/gens_pon.nf.test.snap b/tests/gens_pon.nf.test.snap index 9e1a3f0..f1364d8 100644 --- a/tests/gens_pon.nf.test.snap +++ b/tests/gens_pon.nf.test.snap @@ -46,11 +46,8 @@ "pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml" ], [ - "gens_pon.hdf5:md5,31077d93155c1ffb0ba1904b5056386a", - "genome.interval_list:md5,aa0fc11d901dd9b3bbfd788f83d31800", "sample1.tsv:md5,bfceab35109b04317f38f9cc1cde71ca", "sample2.tsv:md5,7141d08cdc26f6057557be9e23ef4365", - "genome.dict:md5,07f6990366eb7835eeecae174ad3db6a", "genome.fasta.fai:md5,3520cd30e1b100e55f578db9c855f685", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] @@ -61,4 +58,4 @@ }, "timestamp": "2024-10-11T14:12:35.334538" } -} \ No newline at end of file +} From 316dabf0c2f455644f6203381b1acc772d1e2810 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Oct 2024 15:42:35 +0200 Subject: [PATCH 135/234] fix GHA --- .../workflows/template_version_comment.yml | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template_version_comment.yml index 9dea41f..d67ee08 100644 --- a/.github/workflows/template_version_comment.yml +++ b/.github/workflows/template_version_comment.yml @@ -10,9 +10,11 @@ jobs: steps: - name: Check out pipeline code uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + ref: ${{ github.event.pull_request.head.sha }} - name: Read template version from .nf-core.yml - uses: pietrobolcato/action-read-yaml@1.0.0 + uses: nichmor/minimal-read-yaml@v0.0.2 id: read_yml with: config: ${{ github.workspace }}/.nf-core.yml @@ -21,23 +23,23 @@ jobs: run: | python -m pip install --upgrade pip pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} - - name: Check nf-core outdated id: nf_core_outdated - run: pip list --outdated | grep nf-core + run: echo "OUTPUT=$(pip list --outdated | grep nf-core)" >> ${GITHUB_ENV} - name: Post nf-core template version comment uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 if: | - ${{ steps.nf_core_outdated.outputs.stdout }} =~ 'nf-core' + contains(env.OUTPUT, 'nf-core') with: repo-token: ${{ secrets.NF_CORE_BOT_AUTH_TOKEN }} allow-repeats: false message: | - ## :warning: Newer version of the nf-core template is available. - - Your pipeline is using an old version of the nf-core template: ${{ steps.read_yml.outputs['nf_core_version'] }}. - Please update your pipeline to the latest version. - - For more documentation on how to update your pipeline, please see the [nf-core documentation](https://github.com/nf-core/tools?tab=readme-ov-file#sync-a-pipeline-with-the-template) and [Synchronisation documentation](https://nf-co.re/docs/contributing/sync). + > [!WARNING] + > Newer version of the nf-core template is available. + > + > Your pipeline is using an old version of the nf-core template: ${{ steps.read_yml.outputs['nf_core_version'] }}. + > Please update your pipeline to the latest version. + > + > For more documentation on how to update your pipeline, please see the [nf-core documentation](https://github.com/nf-core/tools?tab=readme-ov-file#sync-a-pipeline-with-the-template) and [Synchronisation documentation](https://nf-co.re/docs/contributing/sync). # From 232dcf4027d52a9421b3698417957052537ee8f8 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Oct 2024 16:07:07 +0200 Subject: [PATCH 136/234] fix germlinecnvcaller snapshots --- tests/.nftignore | 25 +++++++- tests/germlinecnvcaller_cohort.nf.test.snap | 65 +-------------------- 2 files changed, 25 insertions(+), 65 deletions(-) diff --git a/tests/.nftignore b/tests/.nftignore index 9414705..25d24c4 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -1,9 +1,32 @@ +*/references/genome.dict gatk4/*.{vcf.gz,vcf.gz.tbi} gatk4/test/* gatk4/test/** gens_pon/createreadcountpanelofnormals/gens_pon.hdf5 gens_pon/intervals/genome.interval_list -gens_pon/references/genome.dict +germlinecnvcaller/determinegermlinecontigploidy/cohort-model/mu_psi_j_log__.tsv +germlinecnvcaller/determinegermlinecontigploidy/cohort-model/std_psi_j_log__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/log_c_emission_tc.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/log_q_c_tc.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/mu_denoised_copy_ratio_t.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/mu_psi_s_log__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/mu_read_depth_s_log__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/mu_z_su.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/std_denoised_copy_ratio_t.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/std_psi_s_log__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/std_read_depth_s_log__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/std_z_su.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/log_q_tau_tk.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/mu_W_tu.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/mu_ard_u_log__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/mu_log_mean_bias_t.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/mu_psi_t_log__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/std_W_tu.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/std_ard_u_log__.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/std_log_mean_bias_t.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/std_psi_t_log__.tsv +germlinecnvcaller/readcounts/*.hdf5 multiqc/multiqc_data/multiqc.log multiqc/multiqc_data/multiqc_data.json multiqc/multiqc_data/multiqc_general_stats.txt diff --git a/tests/germlinecnvcaller_cohort.nf.test.snap b/tests/germlinecnvcaller_cohort.nf.test.snap index 040224d..4187f37 100644 --- a/tests/germlinecnvcaller_cohort.nf.test.snap +++ b/tests/germlinecnvcaller_cohort.nf.test.snap @@ -161,34 +161,12 @@ "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", "interval_list.tsv:md5,8c5aaf57cf34ff35b183178a87a9f864", "mu_mean_bias_j_lowerbound__.tsv:md5,76bfcb151ebf3c97cf9e3499891eb652", - "mu_psi_j_log__.tsv:md5,82bd39a4dc5395c07522f2c24f7b2a0e", "ploidy_config.json:md5,4cdc16109826fa7d3cdfd1dc8758ec27", "std_mean_bias_j_lowerbound__.tsv:md5,9b31e5eecfd8ec379bc7ff126d599ab9", - "std_psi_j_log__.tsv:md5,2f77d2060b0d8adc3d0f486b0ad2985a", "baseline_copy_number_t.tsv:md5,a8f848d75d241f7932ffdca261bffe7b", - "log_c_emission_tc.tsv:md5,0748bf2a65d30bf05ce3369cb10717ce", - "log_q_c_tc.tsv:md5,7b59a0e6d19b12f2eff4d297c629bb7f", - "mu_denoised_copy_ratio_t.tsv:md5,3f7093217a4b1608860b3e644a33fefe", - "mu_psi_s_log__.tsv:md5,16d70d8f81be635c23bf5c5b3c9fac95", - "mu_read_depth_s_log__.tsv:md5,8d333418ef1c0f6000d8cd1d5991bf0a", - "mu_z_su.tsv:md5,c87b85c2d5ec774344c3b134b5a92b39", "sample_name.txt:md5,5cf41871eea16c0fbfdcb7f0f664ed95", - "std_denoised_copy_ratio_t.tsv:md5,0e413c4a5ef8ae38b3aae3546e6ca13f", - "std_psi_s_log__.tsv:md5,a8d6bfb303daf3159d8ca4d8b3308ef4", - "std_read_depth_s_log__.tsv:md5,4b8d04c940be597eca6e9440d83426af", - "std_z_su.tsv:md5,97141ce78b363f8c749f9dabaa8f71d1", "baseline_copy_number_t.tsv:md5,51670372d61cf525a2303d293129fc2e", - "log_c_emission_tc.tsv:md5,7f3d964fabec816b66612fff5b5c5aa2", - "log_q_c_tc.tsv:md5,bc179abdd6232d51ad157df350e3f5fd", - "mu_denoised_copy_ratio_t.tsv:md5,690a41c02d96d9d7a9be1a47d5fd7369", - "mu_psi_s_log__.tsv:md5,f8d624a7831fd0945ae183145947a846", - "mu_read_depth_s_log__.tsv:md5,80432c22763a397ad25282f7d72b0fcd", - "mu_z_su.tsv:md5,fa3dab26a0e25ceb114290e1532fbc60", "sample_name.txt:md5,640a474d6f73b01524287f50ca538c04", - "std_denoised_copy_ratio_t.tsv:md5,bed016c512813ca0ce58965acc59500e", - "std_psi_s_log__.tsv:md5,9e8d00a4c9abd6d3b745e92dd113fea5", - "std_read_depth_s_log__.tsv:md5,0a911950d0e2382dad7a487b59c4478b", - "std_z_su.tsv:md5,6de492e11cf75d68db013adc708dec41", "calling_config.json:md5,dc78889e3d0ba1adf02d10db0194128e", "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", @@ -197,39 +175,10 @@ "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", "interval_list.tsv:md5,0cba6efc5fa465f7930c901916a3e764", - "log_q_tau_tk.tsv:md5,10f1fbf986d3b208609f324b332971a8", - "mu_W_tu.tsv:md5,98109ec936c1db20c4c3755d3a7badfe", - "mu_ard_u_log__.tsv:md5,f1b230c7a55edf9d7a843dfe6dac0215", - "mu_log_mean_bias_t.tsv:md5,75ae725922f6091436577891cf0f9f16", - "mu_psi_t_log__.tsv:md5,fa680d610797e70e6570c0ff0fc8571c", - "std_W_tu.tsv:md5,6352d533546632d1d9315fbe758f53a8", - "std_ard_u_log__.tsv:md5,1af61ffd2028816fc1965351039db795", - "std_log_mean_bias_t.tsv:md5,22fc650dd573543afa303f0bb3d2318c", - "std_psi_t_log__.tsv:md5,bcd1b4e7906035a752ad96eb797f296b", "baseline_copy_number_t.tsv:md5,a8f848d75d241f7932ffdca261bffe7b", - "log_c_emission_tc.tsv:md5,19616a39502b839c638ceb653d31f041", - "log_q_c_tc.tsv:md5,02f6ad2cc53e1eddc8195d55bb1a126a", - "mu_denoised_copy_ratio_t.tsv:md5,ccbfb670b45a8ddeaee4ee3d551571c7", - "mu_psi_s_log__.tsv:md5,d21c650d92b4888ce3aaa55e5a1291d4", - "mu_read_depth_s_log__.tsv:md5,b052485317c02183d43fcc6754ef5beb", - "mu_z_su.tsv:md5,43669475085e3197e1ded507eb34b536", "sample_name.txt:md5,5cf41871eea16c0fbfdcb7f0f664ed95", - "std_denoised_copy_ratio_t.tsv:md5,4665c3671b0d5d6b198285c7a8744e61", - "std_psi_s_log__.tsv:md5,95e09044cf38a3e6b89646564ac0d247", - "std_read_depth_s_log__.tsv:md5,82b422a2a5f91fdb5d97395bfeb5d8e2", - "std_z_su.tsv:md5,1974f65e053ad05910f2ec8bb21d2bcb", "baseline_copy_number_t.tsv:md5,51670372d61cf525a2303d293129fc2e", - "log_c_emission_tc.tsv:md5,2bc684d5e2dbd9b4bce79ed5635ab6af", - "log_q_c_tc.tsv:md5,a6ef596969e5e89566f2924e96e86951", - "mu_denoised_copy_ratio_t.tsv:md5,6ff4d7eb72ce770cdff2b36845e2a0d3", - "mu_psi_s_log__.tsv:md5,47d38b45a67590f18bc2aa99788fbbfb", - "mu_read_depth_s_log__.tsv:md5,e38ad1da0efd3780ae04cd3f2dcfe336", - "mu_z_su.tsv:md5,38469a673b1d4adb3b7aea55495667e6", "sample_name.txt:md5,640a474d6f73b01524287f50ca538c04", - "std_denoised_copy_ratio_t.tsv:md5,e1cce7501fe552868c6b8ed69158147f", - "std_psi_s_log__.tsv:md5,3908edbdf73919c1908451f7450a9c85", - "std_read_depth_s_log__.tsv:md5,06a964068988148b856079df77dfeff9", - "std_z_su.tsv:md5,fd1fa6c54f02bdca72b928ddefeb088b", "calling_config.json:md5,dc78889e3d0ba1adf02d10db0194128e", "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", @@ -238,18 +187,6 @@ "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", "interval_list.tsv:md5,9f7718372e87e69aeb837678755a9971", - "log_q_tau_tk.tsv:md5,e14fa02f301cfb42d058d95a9a438aad", - "mu_W_tu.tsv:md5,fdd960e9dc1ef0de49546f7bc9862557", - "mu_ard_u_log__.tsv:md5,4e653386ceff2108340d6d064082934c", - "mu_log_mean_bias_t.tsv:md5,5b27383c83d3498b93a2253c3bc53432", - "mu_psi_t_log__.tsv:md5,e842bda2a1fd39205fcb9298ace5bd0c", - "std_W_tu.tsv:md5,65aa1894391567b595ee49f0d4321190", - "std_ard_u_log__.tsv:md5,bb973bd96eb1ebd41020cb6d2ae98603", - "std_log_mean_bias_t.tsv:md5,32b2081e95c279556c9831bdc1e49631", - "std_psi_t_log__.tsv:md5,2eb3de95440b579319e2556767f8404d", - "sample1.hdf5:md5,59e1b75cc5cbf0225000fc9ddbd4ce22", - "sample2.hdf5:md5,8dc7e1a1e6f6134612d72eef1a195a7e", - "genome.dict:md5,5e66d0ced1424dc24a4c01e98b39ece5", "genome.fasta.fai:md5,3520cd30e1b100e55f578db9c855f685", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] @@ -258,6 +195,6 @@ "nf-test": "0.9.0", "nextflow": "24.09.0" }, - "timestamp": "2024-10-11T14:40:54.111307" + "timestamp": "2024-10-11T16:00:18.603619" } } \ No newline at end of file From c4b9065f109b602925422f0885ff5e65832193a8 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Oct 2024 16:28:48 +0200 Subject: [PATCH 137/234] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bc09d2d..c4ba09f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#24](https://github.com/nf-core/createpanelrefs/pull/24) - Updates germlinecnvcaller and gens subworkflows to use custom names for panel of normals. - [#28](https://github.com/nf-core/createpanelrefs/pull/28) - Updates default args for gens subworkflow and made the parameters available from the command line. - [#31](https://github.com/nf-core/createpanelrefs/pull/31) - Publish interval_list file from gens subworkflow by default. +- [#35](https://github.com/nf-core/createpanelrefs/pull/35) - Template update for nf-core/tools v3.0.2 ### `Fixed` From db89b8547f62447995aad2f15c337a2a017d9fd8 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Oct 2024 16:36:39 +0200 Subject: [PATCH 138/234] leftover merge conflicts --- .../local/utils_nfcore_createpanelrefs_pipeline/main.nf | 8 -------- 1 file changed, 8 deletions(-) diff --git a/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf b/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf index bfced5c..c575a10 100644 --- a/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf @@ -47,10 +47,6 @@ workflow PIPELINE_INITIALISATION { workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 ) -<<<<<<< HEAD -======= - ->>>>>>> upstream/nf-core-template-merge-3.0.1 // // Validate parameters and generate parameter summary to stdout // @@ -59,10 +55,6 @@ workflow PIPELINE_INITIALISATION { validate_params, null ) -<<<<<<< HEAD -======= - ->>>>>>> upstream/nf-core-template-merge-3.0.1 // // Check config provided to the pipeline From 5db95904ca0c2c41239766dcf229c5a00adccaa6 Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Fri, 11 Oct 2024 17:03:33 +0200 Subject: [PATCH 139/234] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4ba09f..f819c1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#28](https://github.com/nf-core/createpanelrefs/pull/28) - Updates default args for gens subworkflow and made the parameters available from the command line. - [#31](https://github.com/nf-core/createpanelrefs/pull/31) - Publish interval_list file from gens subworkflow by default. - [#35](https://github.com/nf-core/createpanelrefs/pull/35) - Template update for nf-core/tools v3.0.2 +- [#35](https://github.com/nf-core/createpanelrefs/pull/35) - Improve pipeline level tests ### `Fixed` From 50259edd1f90b08dbef2534338d523ac21a548ad Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Fri, 11 Oct 2024 18:02:35 +0200 Subject: [PATCH 140/234] Update workflows/createpanelrefs.nf Co-authored-by: Louis LE NEZET <58640615+LouisLeNezet@users.noreply.github.com> --- workflows/createpanelrefs.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 2ceee98..04e986b 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -208,7 +208,8 @@ workflow CREATEPANELREFS { [] ) - emit:multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } From 85076d28c6d642ebabe8bf0bcdf7d18e37070fa5 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 4 Nov 2024 19:06:19 +0100 Subject: [PATCH 141/234] update modules --- modules.json | 32 +-- modules/nf-core/cnvkit/batch/environment.yml | 7 +- modules/nf-core/cnvkit/batch/meta.yml | 191 +++++++------ .../nf-core/cnvkit/batch/tests/main.nf.test | 105 +++---- .../cnvkit/batch/tests/main.nf.test.snap | 87 ++++-- .../gatk4/annotateintervals/environment.yml | 2 - .../nf-core/gatk4/annotateintervals/meta.yml | 185 ++++++------ .../annotateintervals/tests/main.nf.test | 58 ++-- .../gatk4/bedtointervallist/environment.yml | 2 - .../nf-core/gatk4/bedtointervallist/meta.yml | 59 ++-- .../gatk4/collectreadcounts/environment.yml | 2 - .../nf-core/gatk4/collectreadcounts/meta.yml | 136 +++++---- .../collectreadcounts/tests/main.nf.test | 104 +++++++ .../collectreadcounts/tests/main.nf.test.snap | 97 +++++++ .../tests/nextflow.cram.config | 5 + .../tests/nextflow.hdf5.config | 5 + .../tests/nextflow.tsv.config | 5 + .../environment.yml | 2 - .../createreadcountpanelofnormals/meta.yml | 48 ++-- .../environment.yml | 2 - .../createsomaticpanelofnormals/meta.yml | 104 ++++--- .../determinegermlinecontigploidy/main.nf | 2 + .../determinegermlinecontigploidy/meta.yml | 112 ++++---- .../gatk4/filterintervals/environment.yml | 2 - .../nf-core/gatk4/filterintervals/meta.yml | 90 +++--- .../gatk4/filterintervals/tests/main.nf.test | 38 +++ .../filterintervals/tests/main.nf.test.snap | 35 +++ .../filterintervals/tests/nextflow.config | 5 + .../gatk4/genomicsdbimport/environment.yml | 2 - .../nf-core/gatk4/genomicsdbimport/meta.yml | 139 +++++---- .../gatk4/genomicsdbimport/tests/main.nf.test | 41 +-- .../genomicsdbimport/tests/main.nf.test.snap | 92 ++++-- .../nf-core/gatk4/germlinecnvcaller/main.nf | 2 + .../nf-core/gatk4/germlinecnvcaller/meta.yml | 107 ++++--- .../gatk4/indexfeaturefile/environment.yml | 2 - .../nf-core/gatk4/indexfeaturefile/meta.yml | 43 +-- .../gatk4/indexfeaturefile/tests/main.nf.test | 8 +- .../gatk4/intervallisttools/environment.yml | 2 - .../nf-core/gatk4/intervallisttools/meta.yml | 51 ++-- .../gatk4/preprocessintervals/environment.yml | 2 - .../gatk4/preprocessintervals/meta.yml | 128 +++++---- .../preprocessintervals/tests/main.nf.test | 49 ++++ .../tests/main.nf.test.snap | 35 +++ .../preprocessintervals/tests/nextflow.config | 5 + .../createsequencedictionary/environment.yml | 4 +- .../picard/createsequencedictionary/main.nf | 16 +- .../picard/createsequencedictionary/meta.yml | 45 +-- .../tests/main.nf.test | 61 ++++ .../tests/main.nf.test.snap | 48 ++++ .../nf-core/samtools/faidx/environment.yml | 10 +- modules/nf-core/samtools/faidx/main.nf | 4 +- modules/nf-core/samtools/faidx/meta.yml | 85 +++--- .../samtools/faidx/tests/main.nf.test.snap | 50 ++-- .../nf-core/samtools/index/environment.yml | 8 +- modules/nf-core/samtools/index/main.nf | 11 +- modules/nf-core/samtools/index/meta.yml | 68 +++-- .../nf-core/samtools/index/tests/main.nf.test | 87 ++++-- .../samtools/index/tests/main.nf.test.snap | 264 +++++++++++++++--- 58 files changed, 2015 insertions(+), 976 deletions(-) create mode 100644 modules/nf-core/gatk4/collectreadcounts/tests/main.nf.test create mode 100644 modules/nf-core/gatk4/collectreadcounts/tests/main.nf.test.snap create mode 100644 modules/nf-core/gatk4/collectreadcounts/tests/nextflow.cram.config create mode 100644 modules/nf-core/gatk4/collectreadcounts/tests/nextflow.hdf5.config create mode 100644 modules/nf-core/gatk4/collectreadcounts/tests/nextflow.tsv.config create mode 100644 modules/nf-core/gatk4/filterintervals/tests/main.nf.test create mode 100644 modules/nf-core/gatk4/filterintervals/tests/main.nf.test.snap create mode 100644 modules/nf-core/gatk4/filterintervals/tests/nextflow.config create mode 100644 modules/nf-core/gatk4/preprocessintervals/tests/main.nf.test create mode 100644 modules/nf-core/gatk4/preprocessintervals/tests/main.nf.test.snap create mode 100644 modules/nf-core/gatk4/preprocessintervals/tests/nextflow.config create mode 100644 modules/nf-core/picard/createsequencedictionary/tests/main.nf.test create mode 100644 modules/nf-core/picard/createsequencedictionary/tests/main.nf.test.snap diff --git a/modules.json b/modules.json index fc72254..143d68a 100644 --- a/modules.json +++ b/modules.json @@ -7,62 +7,62 @@ "nf-core": { "cnvkit/batch": { "branch": "master", - "git_sha": "f53b071421340e6fac0806c86ba030e578e94826", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "gatk4/annotateintervals": { "branch": "master", - "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "gatk4/bedtointervallist": { "branch": "master", - "git_sha": "d3f215802f696f7993f25c759781d2db91232015", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "gatk4/collectreadcounts": { "branch": "master", - "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "gatk4/createreadcountpanelofnormals": { "branch": "master", - "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "gatk4/createsomaticpanelofnormals": { "branch": "master", - "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["bam_create_som_pon_gatk"] }, "gatk4/determinegermlinecontigploidy": { "branch": "master", - "git_sha": "60a7dbae179bcfa24c10294cc9a07423a239c19a", + "git_sha": "d6dd4ddea1663edbf15f40e089cc16db96e75f1d", "installed_by": ["modules"] }, "gatk4/filterintervals": { "branch": "master", - "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "gatk4/genomicsdbimport": { "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["bam_create_som_pon_gatk"] }, "gatk4/germlinecnvcaller": { "branch": "master", - "git_sha": "cf607b7749da0a8f5ca2a1e31233e13e3159e2fe", + "git_sha": "d6dd4ddea1663edbf15f40e089cc16db96e75f1d", "installed_by": ["modules"] }, "gatk4/indexfeaturefile": { "branch": "master", - "git_sha": "108e1f5bafc045ac19890852a41e8d95ae12aa67", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "gatk4/intervallisttools": { "branch": "master", - "git_sha": "afb604624abcc2fc67a43d70a2de369a50d16105", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "gatk4/mutect2": { @@ -72,7 +72,7 @@ }, "gatk4/preprocessintervals": { "branch": "master", - "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "multiqc": { @@ -82,17 +82,17 @@ }, "picard/createsequencedictionary": { "branch": "master", - "git_sha": "20b0918591d4ba20047d7e13e5094bcceba81447", + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", - "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", "installed_by": ["modules"] }, "samtools/index": { "branch": "master", - "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", "installed_by": ["modules"] } } diff --git a/modules/nf-core/cnvkit/batch/environment.yml b/modules/nf-core/cnvkit/batch/environment.yml index 10c5d6b..5d79360 100644 --- a/modules/nf-core/cnvkit/batch/environment.yml +++ b/modules/nf-core/cnvkit/batch/environment.yml @@ -1,11 +1,8 @@ -name: cnvkit_batch - channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::cnvkit=0.9.10 - - bioconda::htslib=1.19.1 - - bioconda::samtools=1.19.2 + - bioconda::htslib=1.17 # Matched with the container + - bioconda::samtools=1.17 # Matched with the container diff --git a/modules/nf-core/cnvkit/batch/meta.yml b/modules/nf-core/cnvkit/batch/meta.yml index f14efe5..30f7a1a 100644 --- a/modules/nf-core/cnvkit/batch/meta.yml +++ b/modules/nf-core/cnvkit/batch/meta.yml @@ -12,94 +12,127 @@ tools: homepage: https://cnvkit.readthedocs.io/en/stable/index.html documentation: https://cnvkit.readthedocs.io/en/stable/index.html licence: ["Apache-2.0"] + identifier: biotools:cnvkit input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - tumor: - type: file - description: | - Input tumour sample bam file (or cram) - - normal: - type: file - description: | - Input normal sample bam file (or cram) - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fasta: - type: file - description: | - Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) - - meta3: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fasta_fai: - type: file - description: | - Input reference genome fasta index (optional, but recommended for cram_input) - - meta4: - type: map - description: | - Groovy Map containing information about target file - e.g. [ id:'test' ] - - targets: - type: file - description: | - Input target bed file - - meta5: - type: map - description: | - Groovy Map containing information about reference file - e.g. [ id:'test' ] - - reference: - type: file - description: | - Input reference cnn-file (only for germline and tumor-only running) - - panel_of_normals: - type: file - description: | - Input panel of normals file + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tumor: + type: file + description: | + Input tumour sample bam file (or cram) + - normal: + type: file + description: | + Input normal sample bam file (or cram) + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: | + Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta_fai: + type: file + description: | + Input reference genome fasta index (optional, but recommended for cram_input) + - - meta4: + type: map + description: | + Groovy Map containing information about target file + e.g. [ id:'test' ] + - targets: + type: file + description: | + Input target bed file + - - meta5: + type: map + description: | + Groovy Map containing information about reference file + e.g. [ id:'test' ] + - reference: + type: file + description: | + Input reference cnn-file (only for germline and tumor-only running) + - - panel_of_normals: + type: file + description: | + Input panel of normals file output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bed: - type: file - description: File containing genomic regions - pattern: "*.{bed}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bed": + type: file + description: File containing genomic regions + pattern: "*.{bed}" - cnn: - type: file - description: File containing coverage information - pattern: "*.{cnn}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cnn": + type: file + description: File containing coverage information + pattern: "*.{cnn}" - cnr: - type: file - description: File containing copy number ratio information - pattern: "*.{cnr}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cnr": + type: file + description: File containing copy number ratio information + pattern: "*.{cnr}" - cns: - type: file - description: File containing copy number segment information - pattern: "*.{cns}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cns": + type: file + description: File containing copy number segment information + pattern: "*.{cns}" - pdf: - type: file - description: File with plot of copy numbers or segments on chromosomes - pattern: "*.{pdf}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.pdf": + type: file + description: File with plot of copy numbers or segments on chromosomes + pattern: "*.{pdf}" - png: - type: file - description: File with plot of bin-level log2 coverages and segmentation calls - pattern: "*.{png}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.png": + type: file + description: File with plot of bin-level log2 coverages and segmentation calls + pattern: "*.{png}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@adamrtalbot" - "@drpatelh" diff --git a/modules/nf-core/cnvkit/batch/tests/main.nf.test b/modules/nf-core/cnvkit/batch/tests/main.nf.test index b2c0a9b..f191a4b 100644 --- a/modules/nf-core/cnvkit/batch/tests/main.nf.test +++ b/modules/nf-core/cnvkit/batch/tests/main.nf.test @@ -18,12 +18,12 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) - ] - input[1] = [[:],file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) + ] + input[1] = [[:],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] input[2] = [[:],[]] - input[3] = [[:],file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true)] + input[3] = [[:],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/baits.bed', checkIfExists: true)] input[4] = [[:],[]] input[5] = false """ @@ -34,7 +34,7 @@ nextflow_process { println process.out.bed[0][1] assertAll( { assert process.success }, - { assert snapshot(process.out.version).match() } + { assert snapshot(process.out.versions).match() } ) } @@ -49,10 +49,10 @@ nextflow_process { """ input[0] = [ [ id:'test'], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) - ] - input[1] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = [[:],file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)] input[2] = [[:],[]] input[3] = [[:],[]] input[4] = [[:],[]] @@ -64,7 +64,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.version).match() } + { assert snapshot(process.out.versions).match() } ) } @@ -79,11 +79,11 @@ nextflow_process { """ input[0] = [ [ id:'test'], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) - ] - input[1] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] - input[2] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)] + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ] + input[1] = [[:],file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)] + input[2] = [[:],file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)] input[3] = [[:],[]] input[4] = [[:],[]] input[5] = false @@ -94,7 +94,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.version).match() } + { assert snapshot(process.out.versions).match() } ) } @@ -109,13 +109,13 @@ nextflow_process { """ input[0] = [ [ id:'test'], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true), [] - ] + ] input[1] = [[:],[]] input[2] = [[:],[]] input[3] = [[:],[]] - input[4] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true)] + input[4] = [[:],file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/reference_chr21.cnn', checkIfExists: true)] input[5] = false """ } @@ -124,7 +124,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.version).match() } + { assert snapshot(process.out.versions).match() } ) } @@ -139,13 +139,13 @@ nextflow_process { """ input[0] = [ [ id:'test'], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true), [] - ] - input[1] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ] + input[1] = [[:],file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)] input[2] = [[:],[]] input[3] = [[:],[]] - input[4] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true)] + input[4] = [[:],file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/reference_chr21.cnn', checkIfExists: true)] input[5] = false """ } @@ -154,7 +154,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.version).match() } + { assert snapshot(process.out.versions).match() } ) } @@ -168,11 +168,11 @@ nextflow_process { input[0] = [ [ id:'test'], // meta map [], - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true) - ] - input[1] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)] - input[2] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)] - input[3] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true)] + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ] + input[1] = [[:],file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)] + input[2] = [[:],file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true)] + input[3] = [[:],file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', checkIfExists: true)] input[4] = [[:],[]] input[5] = false """ @@ -182,7 +182,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.version).match() } + { assert snapshot(process.out.versions).match() } ) } @@ -195,13 +195,13 @@ nextflow_process { process { """ input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) - ] - input[1] = [[:],file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) + ] + input[1] = [[:],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] input[2] = [[:],[]] - input[3] = [[:],file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true)] + input[3] = [[:],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/baits.bed', checkIfExists: true)] input[4] = [[:],[]] input[5] = false """ @@ -211,7 +211,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.version).match() } + { assert snapshot(process.out.versions).match() } ) } @@ -228,11 +228,12 @@ nextflow_process { input[0] = [ [ id:'test'], // meta map [], - [file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) ] - ] - input[1] = [[:],file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + ] + input[1] = [[:],file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)] input[2] = [[:],[]] input[3] = [[:],[]] input[4] = [[:],[]] @@ -244,7 +245,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.version).match() } + { assert snapshot(process.out.versions).match() } ) } @@ -260,12 +261,12 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) - ] - input[1] = [[:],file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) + ] + input[1] = [[:],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] input[2] = [[:],[]] - input[3] = [[:],file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true)] + input[3] = [[:],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/baits.bed', checkIfExists: true)] input[4] = [[:],[]] input[5] = false """ @@ -275,7 +276,11 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.bed).match() } + { assert snapshot( + process.out.bed, + process.out.versions + ).match() + } ) } diff --git a/modules/nf-core/cnvkit/batch/tests/main.nf.test.snap b/modules/nf-core/cnvkit/batch/tests/main.nf.test.snap index 5d7cb14..205d43f 100644 --- a/modules/nf-core/cnvkit/batch/tests/main.nf.test.snap +++ b/modules/nf-core/cnvkit/batch/tests/main.nf.test.snap @@ -1,19 +1,27 @@ { "cnvkit batch tumouronly mode - bam": { - "content": null, + "content": [ + [ + "versions.yml:md5,5737e02065ca6359586a4078708c73e6" + ] + ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-03-20T14:30:55.150317561" + "timestamp": "2024-08-07T10:07:07.53837" }, "cnvkit batch tumouronly mode - cram": { - "content": null, + "content": [ + [ + "versions.yml:md5,0310a792526148b05f434944a1167835" + ] + ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-03-20T14:31:31.039652656" + "timestamp": "2024-08-07T10:07:48.900117" }, "cnvkit batch - bam - stub": { "content": [ @@ -27,60 +35,87 @@ "baits.target.bed:md5,26d25ff2d6c45b6d92169b3559c6acdb" ] ] + ], + [ + "versions.yml:md5,5737e02065ca6359586a4078708c73e6" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-03-20T14:33:25.790391941" + "timestamp": "2024-08-07T10:09:40.098703" }, "cnvkit batch wgs - bam": { - "content": null, + "content": [ + [ + "versions.yml:md5,5737e02065ca6359586a4078708c73e6" + ] + ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-03-20T14:30:10.677690173" + "timestamp": "2024-08-07T10:06:25.023798" }, "cnvkit batch germline hybrid mode - bam": { - "content": null, + "content": [ + [ + "versions.yml:md5,5737e02065ca6359586a4078708c73e6" + ] + ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-03-20T14:32:50.217076532" + "timestamp": "2024-08-07T10:09:19.191221" }, "cnvkit batch hybrid mode - bam": { - "content": null, + "content": [ + [ + "versions.yml:md5,5737e02065ca6359586a4078708c73e6" + ] + ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-03-20T13:54:41.69602289" + "timestamp": "2024-08-07T10:06:10.438545" }, "cnvkit batch wgs - cram": { - "content": null, + "content": [ + [ + "versions.yml:md5,0310a792526148b05f434944a1167835" + ] + ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-03-20T14:30:27.271060826" + "timestamp": "2024-08-07T10:06:39.492881" }, "cnvkit batch pon mode - bam": { - "content": null, + "content": [ + [ + "versions.yml:md5,5737e02065ca6359586a4078708c73e6" + ] + ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-03-20T14:33:06.391306794" + "timestamp": "2024-08-07T10:09:29.636924" }, "cnvkit batch germline mode - cram": { - "content": null, + "content": [ + [ + "versions.yml:md5,0310a792526148b05f434944a1167835" + ] + ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" }, - "timestamp": "2024-03-20T14:32:23.38326412" + "timestamp": "2024-08-07T10:09:07.307311" } } \ No newline at end of file diff --git a/modules/nf-core/gatk4/annotateintervals/environment.yml b/modules/nf-core/gatk4/annotateintervals/environment.yml index 523753c..55993f4 100644 --- a/modules/nf-core/gatk4/annotateintervals/environment.yml +++ b/modules/nf-core/gatk4/annotateintervals/environment.yml @@ -1,7 +1,5 @@ -name: gatk4_annotateintervals channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/annotateintervals/meta.yml b/modules/nf-core/gatk4/annotateintervals/meta.yml index 1c8e77f..ee644c9 100644 --- a/modules/nf-core/gatk4/annotateintervals/meta.yml +++ b/modules/nf-core/gatk4/annotateintervals/meta.yml @@ -1,5 +1,6 @@ name: "gatk4_annotateintervals" -description: Annotates intervals with GC content, mappability, and segmental-duplication content +description: Annotates intervals with GC content, mappability, and segmental-duplication + content keywords: - annotateintervals - annotation @@ -16,95 +17,105 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - intervals: - type: file - description: One or more interval files to annotate - pattern: "*.{interval_list,list,bed}" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fasta: - type: file - description: The reference FASTA file - pattern: "*.{fasta,fa}" - - meta3: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fasta_fai: - type: file - description: The index of the reference FASTA file - pattern: "*.fai" - - meta4: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - dict: - type: file - description: The sequence dictionary reference FASTA file - pattern: "*.dict" - - meta5: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - mappable_regions: - type: file - description: | - Optional - Umap single-read mappability track - The track should correspond to the appropriate read length and overlapping intervals must be merged - pattern: "*.bed(.gz)?" - - meta6: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - mappable_regions_tbi: - type: file - description: Optional - The index of the gzipped umap single-read mappability track - pattern: "*.bed.gz.tbi" - - meta7: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - segmental_duplication_regions: - type: file - description: Optional - Segmental-duplication track - pattern: "*.bed(.gz)?" - - meta8: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - segmental_duplication_regions_tbi: - type: file - description: Optional - The index of the gzipped segmental-duplication track - pattern: "*.bed.gz.tbi" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals: + type: file + description: One or more interval files to annotate + pattern: "*.{interval_list,list,bed}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: The reference FASTA file + pattern: "*.{fasta,fa}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta_fai: + type: file + description: The index of the reference FASTA file + pattern: "*.fai" + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - dict: + type: file + description: The sequence dictionary reference FASTA file + pattern: "*.dict" + - - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - mappable_regions: + type: file + description: | + Optional - Umap single-read mappability track + The track should correspond to the appropriate read length and overlapping intervals must be merged + pattern: "*.bed(.gz)?" + - - meta6: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - mappable_regions_tbi: + type: file + description: Optional - The index of the gzipped umap single-read mappability + track + pattern: "*.bed.gz.tbi" + - - meta7: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - segmental_duplication_regions: + type: file + description: Optional - Segmental-duplication track + pattern: "*.bed(.gz)?" + - - meta8: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - segmental_duplication_regions_tbi: + type: file + description: Optional - The index of the gzipped segmental-duplication track + pattern: "*.bed.gz.tbi" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - annotated_intervals: - type: file - description: The output TSV file with a SAM-style header containing the annotated intervals - pattern: "*.tsv" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: The output TSV file with a SAM-style header containing the annotated + intervals + pattern: "*.tsv" + - s: + type: file + description: The output TSV file with a SAM-style header containing the annotated + intervals + pattern: "*.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@nvnieuwk" maintainers: diff --git a/modules/nf-core/gatk4/annotateintervals/tests/main.nf.test b/modules/nf-core/gatk4/annotateintervals/tests/main.nf.test index 8d411c6..0f81e1e 100644 --- a/modules/nf-core/gatk4/annotateintervals/tests/main.nf.test +++ b/modules/nf-core/gatk4/annotateintervals/tests/main.nf.test @@ -17,11 +17,11 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) ] - input[1] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] // fasta - input[2] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)] // fasta_fai - input[3] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)] // dict + input[1] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)] // fasta + input[2] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)] // fasta_fai + input[3] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true)] // dict input[4] = [[:],[]] input[5] = [[:],[]] input[6] = [[:],[]] @@ -46,13 +46,13 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map [ - file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['genome_multi_interval_bed'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true) ] ] - input[1] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] // fasta - input[2] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)] // fasta_fai - input[3] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)] // dict + input[1] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)] // fasta + input[2] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)] // fasta_fai + input[3] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true)] // dict input[4] = [[:],[]] input[5] = [[:],[]] input[6] = [[:],[]] @@ -76,11 +76,11 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['homo_sapiens']['genome']['genome_interval_list'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.interval_list', checkIfExists: true) ] - input[1] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] // fasta - input[2] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)] // fasta_fai - input[3] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)] // dict + input[1] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)] // fasta + input[2] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)] // fasta_fai + input[3] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true)] // dict input[4] = [[:],[]] input[5] = [[:],[]] input[6] = [[:],[]] @@ -104,13 +104,13 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['homo_sapiens']['genome']['genome_interval_list'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.interval_list', checkIfExists: true) ] - input[1] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] // fasta - input[2] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)] // fasta_fai - input[3] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)] // dict - input[4] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_bed_gz'], checkIfExists: true)] // mappable_regions - input[5] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_bed_gz_tbi'], checkIfExists: true)] // mappable_regions_tbi + input[1] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)] // fasta + input[2] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)] // fasta_fai + input[3] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true)] // dict + input[4] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed.gz', checkIfExists: true)] // mappable_regions + input[5] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed.gz.tbi', checkIfExists: true)] // mappable_regions_tbi input[6] = [[:],[]] input[7] = [[:],[]] """ @@ -132,15 +132,15 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['homo_sapiens']['genome']['genome_interval_list'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.interval_list', checkIfExists: true) ] - input[1] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] // fasta - input[2] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)] // fasta_fai - input[3] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)] // dict + input[1] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)] // fasta + input[2] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)] // fasta_fai + input[3] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true)] // dict input[4] = [[:],[]] input[5] = [[:],[]] - input[6] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_bed_gz'], checkIfExists: true)] // segmental_duplication_regions - input[7] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_bed_gz_tbi'], checkIfExists: true)] // segmental_duplication_regions_tbi + input[6] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed.gz', checkIfExists: true)] // segmental_duplication_regions + input[7] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed.gz.tbi', checkIfExists: true)] // segmental_duplication_regions_tbi """ } } @@ -162,11 +162,11 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) ] - input[1] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] // fasta - input[2] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)] // fasta_fai - input[3] = [[:], file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)] // dict + input[1] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)] // fasta + input[2] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)] // fasta_fai + input[3] = [[:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true)] // dict input[4] = [[:],[]] input[5] = [[:],[]] input[6] = [[:],[]] diff --git a/modules/nf-core/gatk4/bedtointervallist/environment.yml b/modules/nf-core/gatk4/bedtointervallist/environment.yml index d6fbe2e..55993f4 100644 --- a/modules/nf-core/gatk4/bedtointervallist/environment.yml +++ b/modules/nf-core/gatk4/bedtointervallist/environment.yml @@ -1,7 +1,5 @@ -name: gatk4_bedtointervallist channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/bedtointervallist/meta.yml b/modules/nf-core/gatk4/bedtointervallist/meta.yml index 187da88..25348e1 100644 --- a/modules/nf-core/gatk4/bedtointervallist/meta.yml +++ b/modules/nf-core/gatk4/bedtointervallist/meta.yml @@ -15,34 +15,45 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test'] - - bed: - type: file - description: Input bed file - pattern: "*.bed" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'genome' ] - - dict: - type: file - description: Sequence dictionary - pattern: "*.dict" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - bed: + type: file + description: Input bed file + pattern: "*.bed" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: Sequence dictionary + pattern: "*.dict" output: - interval_list: - type: file - description: gatk interval list file - pattern: "*.interval_list" + - meta: + type: file + description: gatk interval list file + pattern: "*.interval_list" + - "*.interval_list": + type: file + description: gatk interval list file + pattern: "*.interval_list" + - _list: + type: file + description: gatk interval list file + pattern: "*.interval_list" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@kevinmenden" - "@ramprasadn" diff --git a/modules/nf-core/gatk4/collectreadcounts/environment.yml b/modules/nf-core/gatk4/collectreadcounts/environment.yml index d09cd89..55993f4 100644 --- a/modules/nf-core/gatk4/collectreadcounts/environment.yml +++ b/modules/nf-core/gatk4/collectreadcounts/environment.yml @@ -1,7 +1,5 @@ -name: gatk4_collectreadcounts channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/collectreadcounts/meta.yml b/modules/nf-core/gatk4/collectreadcounts/meta.yml index 25fb8b8..cf8c773 100644 --- a/modules/nf-core/gatk4/collectreadcounts/meta.yml +++ b/modules/nf-core/gatk4/collectreadcounts/meta.yml @@ -1,5 +1,6 @@ name: "gatk4_collectreadcounts" -description: Collects read counts at specified intervals. The count for each interval is calculated by counting the number of read starts that lie in the interval. +description: Collects read counts at specified intervals. The count for each interval + is calculated by counting the number of read starts that lie in the interval. keywords: - collectreadcounts - bam @@ -7,75 +8,88 @@ keywords: - gatk4 tools: - gatk4: - description: Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools with a primary focus on variant discovery and genotyping. Its powerful processing engine and high-performance computing features make it capable of taking on projects of any size. + description: Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform + at the Broad Institute, the toolkit offers a wide variety of tools with a primary + focus on variant discovery and genotyping. Its powerful processing engine and + high-performance computing features make it capable of taking on projects of + any size. homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037593911-CombineGVCFs tool_dev_url: https://github.com/broadinstitute/gatk doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - meta3: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - meta4: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - input: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - input_index: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - intervals: - type: file - description: A file containing the specified intervals - pattern: "*.{bed,intervals}" - - fasta: - type: file - description: Optional - Reference FASTA - pattern: "*.{fasta,fa}" - - fai: - type: file - description: Optional - Index of the reference FASTA file - pattern: "*.fai" - - dict: - type: file - description: Optional - Sequence dictionary of the reference FASTA file - pattern: "*.dict" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - input_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - intervals: + type: file + description: A file containing the specified intervals + pattern: "*.{bed,intervals}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Optional - Reference FASTA + pattern: "*.{fasta,fa}" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: Optional - Index of the reference FASTA file + pattern: "*.fai" + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - dict: + type: file + description: Optional - Sequence dictionary of the reference FASTA file + pattern: "*.dict" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - hdf5: - type: file - description: The read counts in hdf5 format - pattern: "*.hdf5" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.hdf5": + type: file + description: The read counts in hdf5 format + pattern: "*.hdf5" - tsv: - type: file - description: The read counts in TSV format - pattern: "*.tsv" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: The read counts in TSV format + pattern: "*.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@nvnieuwk" maintainers: diff --git a/modules/nf-core/gatk4/collectreadcounts/tests/main.nf.test b/modules/nf-core/gatk4/collectreadcounts/tests/main.nf.test new file mode 100644 index 0000000..0832cf8 --- /dev/null +++ b/modules/nf-core/gatk4/collectreadcounts/tests/main.nf.test @@ -0,0 +1,104 @@ + +nextflow_process { + + name "Test Process GATK4_COLLECTREADCOUNTS" + script "../main.nf" + process "GATK4_COLLECTREADCOUNTS" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/collectreadcounts" + + test("test-gatk4-collectreadcounts-hdf5") { + + config "./nextflow.hdf5.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true), + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.hdf5[0][1]).name, + process.out.tsv, + ).match() + } + ) + } + } + + test("test-gatk4-collectreadcounts-tsv") { + + config "./nextflow.tsv.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true), + ] + input[1] = [[],[]] + input[2] = [[],[]] + input[3] = [[],[]] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test-gatk4-collectreadcounts-cram") { + + config "./nextflow.cram.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true), + ] + input[1] = [[id:'test'], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)] + input[2] = [[id:'test'], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)] + input[3] = [[id:'test'], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true)] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/gatk4/collectreadcounts/tests/main.nf.test.snap b/modules/nf-core/gatk4/collectreadcounts/tests/main.nf.test.snap new file mode 100644 index 0000000..28a3a6e --- /dev/null +++ b/modules/nf-core/gatk4/collectreadcounts/tests/main.nf.test.snap @@ -0,0 +1,97 @@ +{ + "test-gatk4-collectreadcounts-cram": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d9a32039b7a84f5bb74e8382e5427670" + ] + ], + "2": [ + "versions.yml:md5,ebf23f4ab63948ba97df07035f8d2659" + ], + "hdf5": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d9a32039b7a84f5bb74e8382e5427670" + ] + ], + "versions": [ + "versions.yml:md5,ebf23f4ab63948ba97df07035f8d2659" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-26T12:58:19.610687" + }, + "test-gatk4-collectreadcounts-hdf5": { + "content": [ + "test.hdf5", + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-26T13:02:48.607644" + }, + "test-gatk4-collectreadcounts-tsv": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,8e45a6164916c303387f39f02ce45841" + ] + ], + "2": [ + "versions.yml:md5,ebf23f4ab63948ba97df07035f8d2659" + ], + "hdf5": [ + + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,8e45a6164916c303387f39f02ce45841" + ] + ], + "versions": [ + "versions.yml:md5,ebf23f4ab63948ba97df07035f8d2659" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-26T12:58:07.500024" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/collectreadcounts/tests/nextflow.cram.config b/modules/nf-core/gatk4/collectreadcounts/tests/nextflow.cram.config new file mode 100644 index 0000000..682bdca --- /dev/null +++ b/modules/nf-core/gatk4/collectreadcounts/tests/nextflow.cram.config @@ -0,0 +1,5 @@ +process { + withName: 'GATK4_COLLECTREADCOUNTS' { + ext.args = "--format TSV --interval-merging-rule OVERLAPPING_ONLY" + } +} diff --git a/modules/nf-core/gatk4/collectreadcounts/tests/nextflow.hdf5.config b/modules/nf-core/gatk4/collectreadcounts/tests/nextflow.hdf5.config new file mode 100644 index 0000000..d6ca881 --- /dev/null +++ b/modules/nf-core/gatk4/collectreadcounts/tests/nextflow.hdf5.config @@ -0,0 +1,5 @@ +process { + withName: 'GATK4_COLLECTREADCOUNTS'{ + ext.args = "--interval-merging-rule OVERLAPPING_ONLY" + } +} diff --git a/modules/nf-core/gatk4/collectreadcounts/tests/nextflow.tsv.config b/modules/nf-core/gatk4/collectreadcounts/tests/nextflow.tsv.config new file mode 100644 index 0000000..682bdca --- /dev/null +++ b/modules/nf-core/gatk4/collectreadcounts/tests/nextflow.tsv.config @@ -0,0 +1,5 @@ +process { + withName: 'GATK4_COLLECTREADCOUNTS' { + ext.args = "--format TSV --interval-merging-rule OVERLAPPING_ONLY" + } +} diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml b/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml index b4f4e77..55993f4 100644 --- a/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml @@ -1,7 +1,5 @@ -name: gatk4_createreadcountpanelofnormals channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml b/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml index ba01f63..cbbed8c 100644 --- a/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml @@ -1,5 +1,6 @@ name: "gatk4_createreadcountpanelofnormals" -description: Creates a panel of normals (PoN) for read-count denoising given the read counts for samples in the panel. +description: Creates a panel of normals (PoN) for read-count denoising given the read + counts for samples in the panel. keywords: - createreadcountpanelofnormals - gatk4 @@ -15,30 +16,33 @@ tools: doi: 10.1158/1538-7445.AM2017-3590 tool_dev_url: "https://github.com/broadinstitute/gatk" licence: ["Apache-2.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test', single_end:false ]` - - counts: - type: file - description: Read counts in hdf5 or tsv format. - pattern: "*.{hdf5,tsv}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - counts: + type: file + description: Read counts in hdf5 or tsv format. + pattern: "*.{hdf5,tsv}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test', single_end:false ]` - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - pon: - type: file - description: Panel-of-normals file. - pattern: "*.{hdf5}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.hdf5": + type: file + description: Panel-of-normals file. + pattern: "*.{hdf5}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@ramprasadn" maintainers: diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml b/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml index ae543c6..55993f4 100644 --- a/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml @@ -1,7 +1,5 @@ -name: gatk4_createsomaticpanelofnormals channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml b/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml index 9c3ee19..45d88c1 100644 --- a/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml @@ -1,5 +1,6 @@ name: gatk4_createsomaticpanelofnormals -description: Create a panel of normals contraining germline and artifactual sites for use with mutect2. +description: Create a panel of normals contraining germline and artifactual sites + for use with mutect2. keywords: - createsomaticpanelofnormals - gatk4 @@ -13,56 +14,67 @@ tools: homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test'] - - genoomicsdb: - type: directory - description: genomicsDB workspace that contains the samples to create the somatic panel of normals with. - pattern: "*_genomicsDBworkspace" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test'] - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - meta3: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test'] - - fai: - type: file - description: Index of reference fasta file - pattern: "*.fasta.fai" - - meta4: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test'] - - dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - genomicsdb: + type: file + description: GenomicsDB database + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test'] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test'] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test'] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" output: - vcf: - type: file - description: panel of normal as compressed vcf file - pattern: "*.vcf.gz" + - meta: + type: file + description: panel of normal as compressed vcf file + pattern: "*.vcf.gz" + - "*.vcf.gz": + type: file + description: panel of normal as compressed vcf file + pattern: "*.vcf.gz" - tbi: - type: file - description: Tabix index of vcf file - pattern: "*vcf.gz.tbi" + - meta: + type: file + description: Tabix index of vcf file + pattern: "*vcf.gz.tbi" + - "*.tbi": + type: file + description: Tabix index of vcf file + pattern: "*vcf.gz.tbi" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@GCJMackenzie" maintainers: diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf b/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf index 71a67f7..fc98e80 100644 --- a/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf @@ -40,6 +40,8 @@ process GATK4_DETERMINEGERMLINECONTIGPLOIDY { } """ export THEANO_FLAGS="base_compiledir=\$PWD" + export OMP_NUM_THREADS=${task.cpus} + export MKL_NUM_THREADS=${task.cpus} gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ DetermineGermlineContigPloidy \\ diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml b/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml index 56eb408..828628b 100644 --- a/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml @@ -1,5 +1,6 @@ name: "gatk4_determinegermlinecontigploidy" -description: Determines the baseline contig ploidy for germline samples given counts data +description: Determines the baseline contig ploidy for germline samples given counts + data keywords: - copy number - counts @@ -15,59 +16,70 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - meta2: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - counts: - type: file - description: One or more count TSV files created with gatk/collectreadcounts - pattern: "*.tsv" - - bed: - type: file - description: Optional - A bed file containing the intervals to include in the process - pattern: "*.bed" - - exclude_beds: - type: file - description: Optional - One or more bed files containing intervals to exclude from the process - pattern: "*.bed" - - contig_ploidy_table: - type: file - description: The contig ploidy priors table - pattern: "*.tsv" - - ploidy_model: - type: directory - description: | - Optional - A folder containing the ploidy model. - When a model is supplied to tool will run in CASE mode. - pattern: '*-model/' + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - counts: + type: file + description: One or more count TSV files created with gatk/collectreadcounts + pattern: "*.tsv" + - bed: + type: file + description: Optional - A bed file containing the intervals to include in the + process + pattern: "*.bed" + - exclude_beds: + type: file + description: Optional - One or more bed files containing intervals to exclude + from the process + pattern: "*.bed" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ploidy_model: + type: directory + description: | + Optional - A folder containing the ploidy model. + When a model is supplied to tool will run in CASE mode. + pattern: '*-model/' + - - contig_ploidy_table: + type: file + description: The contig ploidy priors table + pattern: "*.tsv" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - calls: - type: directory - description: A folder containing the calls from the input files - pattern: "*-calls/" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}-calls: + type: directory + description: A folder containing the calls from the input files + pattern: "*-calls/" - model: - type: directory - description: | - A folder containing the model from the input files. - This will only be created in COHORT mode (when no model is supplied to the process). - pattern: "*-model/" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}-model: + type: directory + description: | + A folder containing the model from the input files. + This will only be created in COHORT mode (when no model is supplied to the process). + pattern: "*-model/" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@nvnieuwk" maintainers: diff --git a/modules/nf-core/gatk4/filterintervals/environment.yml b/modules/nf-core/gatk4/filterintervals/environment.yml index 853995e..55993f4 100644 --- a/modules/nf-core/gatk4/filterintervals/environment.yml +++ b/modules/nf-core/gatk4/filterintervals/environment.yml @@ -1,7 +1,5 @@ -name: gatk4_filterintervals channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/filterintervals/meta.yml b/modules/nf-core/gatk4/filterintervals/meta.yml index 425c001..87376ed 100644 --- a/modules/nf-core/gatk4/filterintervals/meta.yml +++ b/modules/nf-core/gatk4/filterintervals/meta.yml @@ -6,53 +6,63 @@ keywords: - interval_list tools: - "gatk4": - description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools with a primary focus on variant discovery and genotyping. Its powerful processing engine and high-performance computing features make it capable of taking on projects of any size. + description: Developed in the Data Sciences Platform at the Broad Institute, the + toolkit offers a wide variety of tools with a primary focus on variant discovery + and genotyping. Its powerful processing engine and high-performance computing + features make it capable of taking on projects of any size. homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: "10.1158/1538-7445.AM2017-3590" licence: ["Apache-2.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - intervals: - type: file - description: Processed interval list file (processed_intervals.interval_list) - pattern: "*.interval_list" - - meta2: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - read_counts: - type: file - description: Read counts input file - pattern: "*.{tsv, hdf5}" - - meta3: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - annotated_intervals: - type: file - description: Annotated intervals TSV file (annotated_intervals.tsv). - pattern: "*.tsv" + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - intervals: + type: file + description: Processed interval list file (processed_intervals.interval_list) + pattern: "*.interval_list" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - read_counts: + type: file + description: Read counts input file + pattern: "*.{tsv, hdf5}" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - annotated_intervals: + type: file + description: Annotated intervals TSV file (annotated_intervals.tsv). + pattern: "*.tsv" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - interval_list: - type: file - description: Filtered interval list file - pattern: "*.interval_list" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.interval_list": + type: file + description: Filtered interval list file + pattern: "*.interval_list" + - _list: + type: file + description: Filtered interval list file + pattern: "*.interval_list" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@ryanjameskennedy" - "@ViktorHy" diff --git a/modules/nf-core/gatk4/filterintervals/tests/main.nf.test b/modules/nf-core/gatk4/filterintervals/tests/main.nf.test new file mode 100644 index 0000000..a84f562 --- /dev/null +++ b/modules/nf-core/gatk4/filterintervals/tests/main.nf.test @@ -0,0 +1,38 @@ + +nextflow_process { + + name "Test Process GATK4_FILTERINTERVALS" + script "../main.nf" + process "GATK4_FILTERINTERVALS" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/filterintervals" + + test("test-gatk4-filterintervals") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.preprocessed_intervals.interval_list', checkIfExists: true) + ] + input[1] = [ [:], [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.preprocessed_intervals.counts.tsv', checkIfExists: true) ] ] + input[2] = [ [:], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.annotated_intervals.tsv', checkIfExists: true) ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/gatk4/filterintervals/tests/main.nf.test.snap b/modules/nf-core/gatk4/filterintervals/tests/main.nf.test.snap new file mode 100644 index 0000000..c962666 --- /dev/null +++ b/modules/nf-core/gatk4/filterintervals/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "test-gatk4-filterintervals": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.interval_list:md5,67b15dff732693db3542e6b1dc30a5da" + ] + ], + "1": [ + "versions.yml:md5,9a445090a815c06982d5deb5ed7d5e30" + ], + "interval_list": [ + [ + { + "id": "test" + }, + "test.interval_list:md5,67b15dff732693db3542e6b1dc30a5da" + ] + ], + "versions": [ + "versions.yml:md5,9a445090a815c06982d5deb5ed7d5e30" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-26T12:25:35.933532" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/filterintervals/tests/nextflow.config b/modules/nf-core/gatk4/filterintervals/tests/nextflow.config new file mode 100644 index 0000000..93ea379 --- /dev/null +++ b/modules/nf-core/gatk4/filterintervals/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GATK4_FILTERINTERVALS { + ext.args = "--interval-merging-rule OVERLAPPING_ONLY" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/genomicsdbimport/environment.yml b/modules/nf-core/gatk4/genomicsdbimport/environment.yml index a3a1363..55993f4 100644 --- a/modules/nf-core/gatk4/genomicsdbimport/environment.yml +++ b/modules/nf-core/gatk4/genomicsdbimport/environment.yml @@ -1,7 +1,5 @@ -name: gatk4_genomicsdbimport channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/genomicsdbimport/meta.yml b/modules/nf-core/gatk4/genomicsdbimport/meta.yml index 11e565b..174ae2e 100644 --- a/modules/nf-core/gatk4/genomicsdbimport/meta.yml +++ b/modules/nf-core/gatk4/genomicsdbimport/meta.yml @@ -1,5 +1,6 @@ name: gatk4_genomicsdbimport -description: merge GVCFs from multiple samples. For use in joint genotyping or somatic panel of normal creation. +description: merge GVCFs from multiple samples. For use in joint genotyping or somatic + panel of normal creation. keywords: - gatk4 - genomicsdb @@ -15,61 +16,99 @@ tools: homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test'] - - vcf: - type: list - description: either a list of vcf files to be used to create or update a genomicsdb, or a file that contains a map to vcf files to be used. - pattern: "*.vcf.gz" - - tbi: - type: list - description: list of tbi files that match with the input vcf files - pattern: "*.vcf.gz_tbi" - - wspace: - type: file - description: path to an existing genomicsdb to be used in update db mode or get intervals mode. This WILL NOT specify name of a new genomicsdb in create db mode. - pattern: "/path/to/existing/gendb" - - interval_file: - type: file - description: file containing the intervals to be used when creating the genomicsdb - pattern: "*.interval_list" - - interval_value: - type: string - description: if an intervals file has not been spcified, the value enetered here will be used as an interval via the "-L" argument - pattern: "example: chr1:1000-10000" - - run_intlist: - type: boolean - description: Specify whether to run get interval list mode, this option cannot be specified at the same time as run_updatewspace. - pattern: "true/false" - - run_updatewspace: - type: boolean - description: Specify whether to run update genomicsdb mode, this option takes priority over run_intlist. - pattern: "true/false" - - input_map: - type: boolean - description: Specify whether the vcf input is providing a list of vcf file(s) or a single file containing a map of paths to vcf files to be used to create or update a genomicsdb. - pattern: "*.sample_map" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: list + description: either a list of vcf files to be used to create or update a genomicsdb, + or a file that contains a map to vcf files to be used. + pattern: "*.vcf.gz" + - tbi: + type: list + description: list of tbi files that match with the input vcf files + pattern: "*.vcf.gz_tbi" + - interval_file: + type: file + description: file containing the intervals to be used when creating the genomicsdb + pattern: "*.interval_list" + - interval_value: + type: string + description: if an intervals file has not been spcified, the value enetered + here will be used as an interval via the "-L" argument + pattern: "example: chr1:1000-10000" + - wspace: + type: file + description: path to an existing genomicsdb to be used in update db mode or + get intervals mode. This WILL NOT specify name of a new genomicsdb in create + db mode. + pattern: "/path/to/existing/gendb" + - - run_intlist: + type: boolean + description: Specify whether to run get interval list mode, this option cannot + be specified at the same time as run_updatewspace. + pattern: "true/false" + - - run_updatewspace: + type: boolean + description: Specify whether to run update genomicsdb mode, this option takes + priority over run_intlist. + pattern: "true/false" + - - input_map: + type: boolean + description: Specify whether the vcf input is providing a list of vcf file(s) + or a single file containing a map of paths to vcf files to be used to create + or update a genomicsdb. + pattern: "*.sample_map" output: - genomicsdb: - type: directory - description: Directory containing the files that compose the genomicsdb workspace, this is only output for create mode, as update changes an existing db - pattern: "*/$prefix" + - meta: + type: directory + description: Directory containing the files that compose the genomicsdb workspace, + this is only output for create mode, as update changes an existing db + pattern: "*/$prefix" + - $prefix: + type: directory + description: Directory containing the files that compose the genomicsdb workspace, + this is only output for create mode, as update changes an existing db + pattern: "*/$prefix" - updatedb: - type: directory - description: Directory containing the files that compose the updated genomicsdb workspace, this is only output for update mode, and should be the same path as the input wspace. - pattern: "same/path/as/wspace" + - meta: + type: directory + description: Directory containing the files that compose the updated genomicsdb + workspace, this is only output for update mode, and should be the same path + as the input wspace. + pattern: "same/path/as/wspace" + - $updated_db: + type: directory + description: Directory containing the files that compose the updated genomicsdb + workspace, this is only output for update mode, and should be the same path + as the input wspace. + pattern: "same/path/as/wspace" - intervallist: - type: file - description: File containing the intervals used to generate the genomicsdb, only created by get intervals mode. - pattern: "*.interval_list" + - meta: + type: file + description: File containing the intervals used to generate the genomicsdb, + only created by get intervals mode. + pattern: "*.interval_list" + - "*.interval_list": + type: file + description: File containing the intervals used to generate the genomicsdb, + only created by get intervals mode. + pattern: "*.interval_list" + - list: + type: file + description: File containing the intervals used to generate the genomicsdb, + only created by get intervals mode. + pattern: "*.interval_list" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@GCJMackenzie" maintainers: diff --git a/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test index 9c207b3..5fef5dd 100644 --- a/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test +++ b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test @@ -17,11 +17,11 @@ nextflow_process { """ // [meta, vcf, tbi, interval, interval_value, workspace ] input[0] = [ [ id:'test'], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) , - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true) , - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.interval_list', checkIfExists: true) , - [] , - [] ] + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) , + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true) , + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.interval_list', checkIfExists: true) , + [] , + [] ] // run_intlist input[1] = false // run_updatewspace @@ -36,12 +36,14 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.genomicsdb.get(0).get(1)).list().sort()).match() } //{ assert snapshot(file(process.out.updatedb.get(0).get(1)).list().sort()).match() } //{ assert snapshot(process.out.intervallist.get(0).get(1)).match() } + { assert snapshot( + file(process.out.genomicsdb.get(0).get(1)).list().sort(), + process.out.versions + ).match() } ) } - } test("test_gatk4_genomicsdbimport_get_intervalslist") { @@ -76,10 +78,12 @@ nextflow_process { { assert process.success }, //{ assert snapshot(file(process.out.genomicsdb.get(0).get(1)).list().sort()).match() } //{ assert snapshot(file(process.out.updatedb.get(0).get(1)).list().sort()).match() } - { assert snapshot(process.out.intervallist.get(0).get(1)).match() } + { assert snapshot( + process.out.intervallist.get(0).get(1), + process.out.versions + ).match() } ) } - } test("test_gatk4_genomicsdbimport_update_genomicsdb") { @@ -113,11 +117,13 @@ nextflow_process { assertAll( { assert process.success }, //{ assert snapshot(file(process.out.genomicsdb.get(0).get(1)).list().sort()).match() } - { assert snapshot(file(process.out.updatedb.get(0).get(1)).list().sort()).match() } //{ assert snapshot(process.out.intervallist.get(0).get(1)).match() } + { assert snapshot( + file(process.out.updatedb.get(0).get(1)).list().sort(), + process.out.versions + ).match() } ) } - } test("test_gatk4_genomicsdbimport_stub") { @@ -129,11 +135,11 @@ nextflow_process { """ // [meta, vcf, tbi, interval, interval_value, workspace ] input[0] = [ [ id:'test'], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) , - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true) , - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.interval_list', checkIfExists: true) , - [] , - [] ] + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) , + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true) , + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.interval_list', checkIfExists: true) , + [] , + [] ] // run_intlist input[1] = false // run_updatewspace @@ -147,9 +153,8 @@ nextflow_process { then { assertAll( { assert process.success }, + { assert snapshot(process.out).match()} ) } - } - } diff --git a/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap index a633bbd..55ced0d 100644 --- a/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap +++ b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap @@ -1,40 +1,98 @@ { "test_gatk4_genomicsdbimport_get_intervalslist": { "content": [ - "test.interval_list:md5,4c85812ac15fc1cd29711a851d23c0bf" + "test.interval_list:md5,4c85812ac15fc1cd29711a851d23c0bf", + [ + "versions.yml:md5,c1233a04213021aa66599a36e0fb28cc" + ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-28T17:55:03.846241" + "timestamp": "2024-07-09T10:42:51.836379" }, "test_gatk4_genomicsdbimport_create_genomicsdb": { "content": [ - "__tiledb_workspace.tdb", - "callset.json", - "chr22$1$40001", - "vcfheader.vcf", - "vidmap.json" + [ + "__tiledb_workspace.tdb", + "callset.json", + "chr22$1$40001", + "vcfheader.vcf", + "vidmap.json" + ], + [ + "versions.yml:md5,c1233a04213021aa66599a36e0fb28cc" + ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-14T11:22:10.11423157" + "timestamp": "2024-07-09T10:42:36.846239" }, "test_gatk4_genomicsdbimport_update_genomicsdb": { "content": [ - "__tiledb_workspace.tdb", - "callset.json", - "chr22$1$40001", - "vcfheader.vcf", - "vidmap.json" + [ + "__tiledb_workspace.tdb", + "callset.json", + "chr22$1$40001", + "vcfheader.vcf", + "vidmap.json" + ], + [ + "versions.yml:md5,c1233a04213021aa66599a36e0fb28cc" + ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-14T12:46:42.403794676" + "timestamp": "2024-07-09T10:43:09.00769" + }, + "test_gatk4_genomicsdbimport_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,c1233a04213021aa66599a36e0fb28cc" + ], + "genomicsdb": [ + [ + { + "id": "test" + }, + "test:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "intervallist": [ + + ], + "updatedb": [ + + ], + "versions": [ + "versions.yml:md5,c1233a04213021aa66599a36e0fb28cc" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-09T10:43:20.921712" } } \ No newline at end of file diff --git a/modules/nf-core/gatk4/germlinecnvcaller/main.nf b/modules/nf-core/gatk4/germlinecnvcaller/main.nf index cd1916e..90aa7e4 100644 --- a/modules/nf-core/gatk4/germlinecnvcaller/main.nf +++ b/modules/nf-core/gatk4/germlinecnvcaller/main.nf @@ -38,6 +38,8 @@ process GATK4_GERMLINECNVCALLER { } """ export THEANO_FLAGS="base_compiledir=\$PWD" + export OMP_NUM_THREADS=${task.cpus} + export MKL_NUM_THREADS=${task.cpus} gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ GermlineCNVCaller \\ diff --git a/modules/nf-core/gatk4/germlinecnvcaller/meta.yml b/modules/nf-core/gatk4/germlinecnvcaller/meta.yml index d4715ff..a185d9d 100644 --- a/modules/nf-core/gatk4/germlinecnvcaller/meta.yml +++ b/modules/nf-core/gatk4/germlinecnvcaller/meta.yml @@ -1,60 +1,85 @@ name: "gatk4_germlinecnvcaller" -description: Calls copy-number variants in germline samples given their counts and the output of DetermineGermlineContigPloidy. +description: Calls copy-number variants in germline samples given their counts and + the output of DetermineGermlineContigPloidy. keywords: - gatk - germline contig ploidy - germlinecnvcaller tools: - "gatk4": - description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools with a primary focus on variant discovery and genotyping. Its powerful processing engine and high-performance computing features make it capable of taking on projects of any size. + description: Developed in the Data Sciences Platform at the Broad Institute, the + toolkit offers a wide variety of tools with a primary focus on variant discovery + and genotyping. Its powerful processing engine and high-performance computing + features make it capable of taking on projects of any size. homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: "10.1158/1538-7445.AM2017-3590" licence: ["Apache-2.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - tsv: - type: file - description: One or more count TSV files created with gatk/collectreadcounts - pattern: "*.tsv" - - intervals: - type: file - description: Optional - A bed file containing the intervals to include in the process - pattern: "*.bed" - - model: - type: directory - description: Optional - directory containing the model produced by germlinecnvcaller cohort mode - pattern: "*-cnv-model/*-model" - - ploidy: - type: directory - description: Directory containing ploidy calls produced by determinegermlinecontigploidy case or cohort mode - pattern: "*-calls" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tsv: + type: file + description: One or more count TSV files created with gatk/collectreadcounts + pattern: "*.tsv" + - intervals: + type: file + description: Optional - A bed file containing the intervals to include in the + process + pattern: "*.bed" + - ploidy: + type: directory + description: Directory containing ploidy calls produced by determinegermlinecontigploidy + case or cohort mode + pattern: "*-calls" + - model: + type: directory + description: Optional - directory containing the model produced by germlinecnvcaller + cohort mode + pattern: "*-cnv-model/*-model" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - cohortcalls: - type: directory - description: Tar gzipped directory containing calls produced by germlinecnvcaller case mode - pattern: "*-cnv-model/*-calls" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-cnv-model/*-calls": + type: directory + description: Tar gzipped directory containing calls produced by germlinecnvcaller + case mode + pattern: "*-cnv-model/*-calls" - cohortmodel: - type: directory - description: Optional - Tar gzipped directory containing the model produced by germlinecnvcaller cohort mode - pattern: "*-cnv-model/*-model" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-cnv-model/*-model": + type: directory + description: Optional - Tar gzipped directory containing the model produced + by germlinecnvcaller cohort mode + pattern: "*-cnv-model/*-model" - casecalls: - type: directory - description: Tar gzipped directory containing calls produced by germlinecnvcaller case mode - pattern: "*-cnv-calls/*-calls" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-cnv-calls/*-calls": + type: directory + description: Tar gzipped directory containing calls produced by germlinecnvcaller + case mode + pattern: "*-cnv-calls/*-calls" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@ryanjameskennedy" - "@ViktorHy" diff --git a/modules/nf-core/gatk4/indexfeaturefile/environment.yml b/modules/nf-core/gatk4/indexfeaturefile/environment.yml index dccdb15..55993f4 100644 --- a/modules/nf-core/gatk4/indexfeaturefile/environment.yml +++ b/modules/nf-core/gatk4/indexfeaturefile/environment.yml @@ -1,7 +1,5 @@ -name: gatk4_indexfeaturefile channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/indexfeaturefile/meta.yml b/modules/nf-core/gatk4/indexfeaturefile/meta.yml index 575c603..cfc717d 100644 --- a/modules/nf-core/gatk4/indexfeaturefile/meta.yml +++ b/modules/nf-core/gatk4/indexfeaturefile/meta.yml @@ -13,30 +13,33 @@ tools: tool_dev_url: https://github.com/broadinstitute/gatk doi: "10.1158/1538-7445.AM2017-3590" licence: ["BSD-3-clause"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - feature_file: - type: file - description: VCF/BED file - pattern: "*.{vcf,vcf.gz,bed,bed.gz}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - feature_file: + type: file + description: VCF/BED file + pattern: "*.{vcf,vcf.gz,bed,bed.gz}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - index: - type: file - description: Index for VCF/BED file - pattern: "*.{tbi,idx}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{tbi,idx}": + type: file + description: Index for VCF/BED file + pattern: "*.{tbi,idx}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@santiagorevale" maintainers: diff --git a/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test b/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test index ee99a54..994606f 100644 --- a/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test +++ b/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test @@ -17,7 +17,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) ] """ } @@ -40,7 +40,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['genome']['genome_bed_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed.gz', checkIfExists: true) ] """ } @@ -63,7 +63,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf', checkIfExists: true) ] """ } @@ -86,7 +86,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) ] """ } diff --git a/modules/nf-core/gatk4/intervallisttools/environment.yml b/modules/nf-core/gatk4/intervallisttools/environment.yml index a4026f9..55993f4 100644 --- a/modules/nf-core/gatk4/intervallisttools/environment.yml +++ b/modules/nf-core/gatk4/intervallisttools/environment.yml @@ -1,7 +1,5 @@ -name: gatk4_intervallisttools channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/intervallisttools/meta.yml b/modules/nf-core/gatk4/intervallisttools/meta.yml index 748dccf..8ee15a2 100644 --- a/modules/nf-core/gatk4/intervallisttools/meta.yml +++ b/modules/nf-core/gatk4/intervallisttools/meta.yml @@ -1,5 +1,6 @@ name: gatk4_intervallisttools -description: Splits the interval list file into unique, equally-sized interval files and place it under a directory +description: Splits the interval list file into unique, equally-sized interval files + and place it under a directory keywords: - bed - gatk4 @@ -15,30 +16,36 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - interval_list: - type: file - description: Interval list file - pattern: "*.interval_list" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals: + type: file + description: Interval file output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - interval_list: - type: file - description: Interval list files - pattern: "*.interval_list" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*_split/*/*.interval_list": + type: file + description: Interval list files + pattern: "*.interval_list" + - _list: + type: file + description: Interval list files + pattern: "*.interval_list" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@praveenraj2018" maintainers: diff --git a/modules/nf-core/gatk4/preprocessintervals/environment.yml b/modules/nf-core/gatk4/preprocessintervals/environment.yml index ec0b09e..55993f4 100644 --- a/modules/nf-core/gatk4/preprocessintervals/environment.yml +++ b/modules/nf-core/gatk4/preprocessintervals/environment.yml @@ -1,7 +1,5 @@ -name: gatk4_preprocessintervals channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/preprocessintervals/meta.yml b/modules/nf-core/gatk4/preprocessintervals/meta.yml index cf3f6ac..0e13674 100644 --- a/modules/nf-core/gatk4/preprocessintervals/meta.yml +++ b/modules/nf-core/gatk4/preprocessintervals/meta.yml @@ -7,71 +7,83 @@ keywords: - preprocessintervals tools: - "gatk4": - description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools with a primary focus on variant discovery and genotyping. Its powerful processing engine and high-performance computing features make it capable of taking on projects of any size. + description: Developed in the Data Sciences Platform at the Broad Institute, the + toolkit offers a wide variety of tools with a primary focus on variant discovery + and genotyping. Its powerful processing engine and high-performance computing + features make it capable of taking on projects of any size. homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: "10.1158/1538-7445.AM2017-3590" licence: ["Apache-2.0"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fai: - type: file - description: Index of reference fasta file - pattern: "*.fasta.fai" - - meta3: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" - - meta4: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - intervals: - type: file - description: Interval file (bed or interval_list) with the genomic regions to be included from the analysis (optional) - pattern: "*.{bed,interval_list}" - - meta5: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - exclude_intervals: - type: file - description: Interval file (bed or interval_list) with the genomic regions to be excluded from the analysis (optional) - pattern: "*.{bed,interval_list}" + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - intervals: + type: file + description: Interval file (bed or interval_list) with the genomic regions to + be included from the analysis (optional) + pattern: "*.{bed,interval_list}" + - - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - exclude_intervals: + type: file + description: Interval file (bed or interval_list) with the genomic regions to + be excluded from the analysis (optional) + pattern: "*.{bed,interval_list}" output: - - meta: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - interval_list: - type: file - description: Processed interval list file - pattern: "*.{bed,interval_list}" + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - "*.interval_list": + type: file + description: Processed interval list file + pattern: "*.{bed,interval_list}" + - _list: + type: file + description: Processed interval list file + pattern: "*.{bed,interval_list}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@ryanjameskennedy" - "@ViktorHy" diff --git a/modules/nf-core/gatk4/preprocessintervals/tests/main.nf.test b/modules/nf-core/gatk4/preprocessintervals/tests/main.nf.test new file mode 100644 index 0000000..a546fac --- /dev/null +++ b/modules/nf-core/gatk4/preprocessintervals/tests/main.nf.test @@ -0,0 +1,49 @@ + +nextflow_process { + + name "Test Process GATK4_PREPROCESSINTERVALS" + script "../main.nf" + process "GATK4_PREPROCESSINTERVALS" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/preprocessintervals" + + test("test-gatk4-preprocessintervals") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[2] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) + ] + input[3] = [[],[]] + input[4] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/gatk4/preprocessintervals/tests/main.nf.test.snap b/modules/nf-core/gatk4/preprocessintervals/tests/main.nf.test.snap new file mode 100644 index 0000000..5365291 --- /dev/null +++ b/modules/nf-core/gatk4/preprocessintervals/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "test-gatk4-preprocessintervals": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.interval_list:md5,ce14b8fb47a60483fe44473ba40e1583" + ] + ], + "1": [ + "versions.yml:md5,fc3037804d90d3d3424047cfac85d5e4" + ], + "interval_list": [ + [ + { + "id": "test" + }, + "test.interval_list:md5,ce14b8fb47a60483fe44473ba40e1583" + ] + ], + "versions": [ + "versions.yml:md5,fc3037804d90d3d3424047cfac85d5e4" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-26T11:58:51.314382" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/preprocessintervals/tests/nextflow.config b/modules/nf-core/gatk4/preprocessintervals/tests/nextflow.config new file mode 100644 index 0000000..a53707f --- /dev/null +++ b/modules/nf-core/gatk4/preprocessintervals/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GATK4_PREPROCESSINTERVALS { + ext.args = "--padding 0 --interval-merging-rule OVERLAPPING_ONLY" + } +} \ No newline at end of file diff --git a/modules/nf-core/picard/createsequencedictionary/environment.yml b/modules/nf-core/picard/createsequencedictionary/environment.yml index f355462..1d715d5 100644 --- a/modules/nf-core/picard/createsequencedictionary/environment.yml +++ b/modules/nf-core/picard/createsequencedictionary/environment.yml @@ -1,7 +1,5 @@ -name: picard_createsequencedictionary channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::picard=3.1.1 + - bioconda::picard=3.3.0 diff --git a/modules/nf-core/picard/createsequencedictionary/main.nf b/modules/nf-core/picard/createsequencedictionary/main.nf index a1eb8be..49637d1 100644 --- a/modules/nf-core/picard/createsequencedictionary/main.nf +++ b/modules/nf-core/picard/createsequencedictionary/main.nf @@ -4,8 +4,8 @@ process PICARD_CREATESEQUENCEDICTIONARY { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.1.1--hdfd78af_0' : - 'biocontainers/picard:3.1.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:3.3.0--hdfd78af_0' : + 'biocontainers/picard:3.3.0--hdfd78af_0' }" input: tuple val(meta), path(fasta) @@ -39,4 +39,16 @@ process PICARD_CREATESEQUENCEDICTIONARY { picard: \$(picard CreateSequenceDictionary --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.dict + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CreateSequenceDictionary --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + } diff --git a/modules/nf-core/picard/createsequencedictionary/meta.yml b/modules/nf-core/picard/createsequencedictionary/meta.yml index 0cb79cf..6761670 100644 --- a/modules/nf-core/picard/createsequencedictionary/meta.yml +++ b/modules/nf-core/picard/createsequencedictionary/meta.yml @@ -12,30 +12,33 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360036712531-CreateSequenceDictionary-Picard- tool_dev_url: https://github.com/broadinstitute/picard licence: ["MIT"] + identifier: biotools:picard_tools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - reference_dict: - type: file - description: picard dictionary file - pattern: "*.{dict}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.dict": + type: file + description: picard dictionary file + pattern: "*.{dict}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@sateeshperi" - "@mjcipriano" diff --git a/modules/nf-core/picard/createsequencedictionary/tests/main.nf.test b/modules/nf-core/picard/createsequencedictionary/tests/main.nf.test new file mode 100644 index 0000000..8152be9 --- /dev/null +++ b/modules/nf-core/picard/createsequencedictionary/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process PICARD_CREATESEQUENCEDICTIONARY" + script "../main.nf" + process "PICARD_CREATESEQUENCEDICTIONARY" + + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/createsequencedictionary" + + test("sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.reference_dict[0][1]).name, + process.out.versions + ).match() + } + + ) + } + + } + + test("sarscov2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/picard/createsequencedictionary/tests/main.nf.test.snap b/modules/nf-core/picard/createsequencedictionary/tests/main.nf.test.snap new file mode 100644 index 0000000..8142f2a --- /dev/null +++ b/modules/nf-core/picard/createsequencedictionary/tests/main.nf.test.snap @@ -0,0 +1,48 @@ +{ + "sarscov2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.dict:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,3ed186b70c3df55fd4965f48485a01cb" + ], + "reference_dict": [ + [ + { + "id": "test" + }, + "test.dict:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3ed186b70c3df55fd4965f48485a01cb" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:49:55.602683425" + }, + "sarscov2 - fasta": { + "content": [ + "test.dict", + [ + "versions.yml:md5,3ed186b70c3df55fd4965f48485a01cb" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T10:49:40.396060224" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml index f8450fa..62054fc 100644 --- a/modules/nf-core/samtools/faidx/environment.yml +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -1,10 +1,8 @@ -name: samtools_faidx - +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults - dependencies: - - bioconda::htslib=1.20 - - bioconda::samtools=1.20 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index bdcdbc9..28c0a81 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_FAIDX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : - 'biocontainers/samtools:1.20--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index f3c25de..6721b2c 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -14,47 +14,62 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fasta: - type: file - description: FASTA file - pattern: "*.{fa,fasta}" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fai: - type: file - description: FASTA index file - pattern: "*.{fai}" + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - fa: - type: file - description: FASTA file - pattern: "*.{fa}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{fa,fasta}": + type: file + description: FASTA file + pattern: "*.{fa}" - fai: - type: file - description: FASTA index file - pattern: "*.{fai}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fai": + type: file + description: FASTA index file + pattern: "*.{fai}" - gzi: - type: file - description: Optional gzip index file for compressed inputs - pattern: "*.gzi" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.gzi": + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap index 3223b72..1bbb3ec 100644 --- a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -18,7 +18,7 @@ ], "3": [ - "versions.yml:md5,2db78952923a61e05d50b95518b21856" + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" ], "fa": [ @@ -36,15 +36,15 @@ ], "versions": [ - "versions.yml:md5,2db78952923a61e05d50b95518b21856" + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-05-28T15:42:14.779784761" + "timestamp": "2024-09-16T07:57:47.450887871" }, "test_samtools_faidx_bgzip": { "content": [ @@ -71,7 +71,7 @@ ] ], "3": [ - "versions.yml:md5,2db78952923a61e05d50b95518b21856" + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" ], "fa": [ @@ -95,15 +95,15 @@ ] ], "versions": [ - "versions.yml:md5,2db78952923a61e05d50b95518b21856" + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-05-28T15:42:20.256633877" + "timestamp": "2024-09-16T07:58:04.804905659" }, "test_samtools_faidx_fasta": { "content": [ @@ -124,7 +124,7 @@ ], "3": [ - "versions.yml:md5,2db78952923a61e05d50b95518b21856" + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" ], "fa": [ [ @@ -142,15 +142,15 @@ ], "versions": [ - "versions.yml:md5,2db78952923a61e05d50b95518b21856" + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-05-28T15:42:25.632577273" + "timestamp": "2024-09-16T07:58:23.831268154" }, "test_samtools_faidx_stub_fasta": { "content": [ @@ -171,7 +171,7 @@ ], "3": [ - "versions.yml:md5,2db78952923a61e05d50b95518b21856" + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" ], "fa": [ [ @@ -189,15 +189,15 @@ ], "versions": [ - "versions.yml:md5,2db78952923a61e05d50b95518b21856" + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-05-28T15:42:31.058424849" + "timestamp": "2024-09-16T07:58:35.600243706" }, "test_samtools_faidx_stub_fai": { "content": [ @@ -218,7 +218,7 @@ ], "3": [ - "versions.yml:md5,2db78952923a61e05d50b95518b21856" + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" ], "fa": [ @@ -236,14 +236,14 @@ ], "versions": [ - "versions.yml:md5,2db78952923a61e05d50b95518b21856" + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-05-28T15:42:36.479929617" + "timestamp": "2024-09-16T07:58:54.705460167" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml index 260d516..62054fc 100644 --- a/modules/nf-core/samtools/index/environment.yml +++ b/modules/nf-core/samtools/index/environment.yml @@ -1,8 +1,8 @@ -name: samtools_index +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.20 - - bioconda::htslib=1.20 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index b523c21..3117561 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_INDEX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : - 'biocontainers/samtools:1.20--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(input) @@ -35,10 +35,11 @@ process SAMTOOLS_INDEX { """ stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" """ - touch ${input}.bai - touch ${input}.crai - touch ${input}.csi + touch ${input}.${extension} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml index 01a4ee0..db8df0d 100644 --- a/modules/nf-core/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -15,38 +15,52 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - crai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" - csi: - type: file - description: CSI index file - pattern: "*.{csi}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test index bb7756d..ca34fb5 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -9,11 +9,7 @@ nextflow_process { tag "samtools/index" test("bai") { - when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -27,18 +23,13 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out.bai).match("bai") }, - { assert snapshot(process.out.versions).match("bai_versions") } + { assert snapshot(process.out).match() } ) } } test("crai") { - when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -52,20 +43,83 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out.crai).match("crai") }, - { assert snapshot(process.out.versions).match("crai_versions") } + { assert snapshot(process.out).match() } ) } } test("csi") { - config "./csi.nextflow.config" when { - params { - outdir = "$outputDir" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { process { """ input[0] = Channel.of([ @@ -79,8 +133,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert path(process.out.csi.get(0).get(1)).exists() }, - { assert snapshot(process.out.versions).match("csi_versions") } + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap index 52756e8..72d65e8 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -1,74 +1,250 @@ { - "crai_versions": { + "csi - stub": { "content": [ - [ - "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" - ] + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-05-28T15:42:04.203740976" + "timestamp": "2024-09-16T08:21:25.261127166" }, - "csi_versions": { + "crai - stub": { "content": [ - [ - "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" - ] + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-05-28T15:42:09.57475878" + "timestamp": "2024-09-16T08:21:12.653194876" }, - "crai": { + "bai - stub": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" ] - ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-12T18:41:38.446424" + "timestamp": "2024-09-16T08:21:01.854932651" }, - "bai": { + "csi": { "content": [ + "test.paired_end.sorted.bam.csi", [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" - ] + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-12T18:40:46.579747" + "timestamp": "2024-09-16T08:20:51.485364222" }, - "bai_versions": { + "crai": { "content": [ - [ - "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77" - ] + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:40.518873972" + }, + "bai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-05-28T15:41:57.929287369" + "timestamp": "2024-09-16T08:20:21.184050361" } } \ No newline at end of file From 8b6a098dbd33f4f3bd4166a95542c43d6509a9aa Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 4 Nov 2024 20:35:20 +0100 Subject: [PATCH 142/234] update snaps --- tests/gens_pon.nf.test.snap | 12 ++++++------ tests/germlinecnvcaller_cohort.nf.test.snap | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/gens_pon.nf.test.snap b/tests/gens_pon.nf.test.snap index f1364d8..1ecaec1 100644 --- a/tests/gens_pon.nf.test.snap +++ b/tests/gens_pon.nf.test.snap @@ -13,10 +13,10 @@ "gatk4": "4.5.0.0" }, "PICARD_CREATESEQUENCEDICTIONARY": { - "picard": "3.1.1" + "picard": "3.3.0" }, "SAMTOOLS_FAIDX": { - "samtools": 1.2 + "samtools": 1.21 }, "Workflow": { "nf-core/createpanelrefs": "v1.0dev" @@ -53,9 +53,9 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-10-11T14:12:35.334538" + "timestamp": "2024-11-04T19:29:15.733933882" } -} +} \ No newline at end of file diff --git a/tests/germlinecnvcaller_cohort.nf.test.snap b/tests/germlinecnvcaller_cohort.nf.test.snap index bb0d87e..01e3582 100644 --- a/tests/germlinecnvcaller_cohort.nf.test.snap +++ b/tests/germlinecnvcaller_cohort.nf.test.snap @@ -25,10 +25,10 @@ "gatk4": "4.5.0.0" }, "PICARD_CREATESEQUENCEDICTIONARY": { - "picard": "3.1.1" + "picard": "3.3.0" }, "SAMTOOLS_FAIDX": { - "samtools": 1.2 + "samtools": 1.21 }, "Workflow": { "nf-core/createpanelrefs": "v1.0dev" @@ -184,9 +184,9 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-10-11T16:00:18.603619" + "timestamp": "2024-11-04T19:45:08.802551855" } -} +} \ No newline at end of file From 2de9999dc3146eb36f9d7f7cf92876fd00cf756f Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 12 Nov 2024 10:00:01 +0100 Subject: [PATCH 143/234] switch to gatk4 sequencedictionary --- conf/modules/gens_pon.config | 2 +- conf/modules/germlinecnvcaller_cohort.config | 2 +- modules.json | 10 +-- .../createsequencedictionary/environment.yml | 2 +- .../gatk4/createsequencedictionary/main.nf | 52 ++++++++++++++ .../gatk4/createsequencedictionary/meta.yml | 49 +++++++++++++ .../tests/main.nf.test | 27 +++----- .../tests/main.nf.test.snap | 68 +++++++++++++++++++ .../createsequencedictionary/tests/tags.yml | 2 + .../picard/createsequencedictionary/main.nf | 54 --------------- .../picard/createsequencedictionary/meta.yml | 49 ------------- .../tests/main.nf.test.snap | 48 ------------- nextflow_schema.json | 2 +- subworkflows/local/gens_pon/main.nf | 8 +-- .../local/germlinecnvcaller_cohort/main.nf | 8 +-- 15 files changed, 199 insertions(+), 184 deletions(-) rename modules/nf-core/{picard => gatk4}/createsequencedictionary/environment.yml (65%) create mode 100644 modules/nf-core/gatk4/createsequencedictionary/main.nf create mode 100644 modules/nf-core/gatk4/createsequencedictionary/meta.yml rename modules/nf-core/{picard => gatk4}/createsequencedictionary/tests/main.nf.test (56%) create mode 100644 modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap create mode 100644 modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml delete mode 100644 modules/nf-core/picard/createsequencedictionary/main.nf delete mode 100644 modules/nf-core/picard/createsequencedictionary/meta.yml delete mode 100644 modules/nf-core/picard/createsequencedictionary/tests/main.nf.test.snap diff --git a/conf/modules/gens_pon.config b/conf/modules/gens_pon.config index f64835e..3c10be7 100644 --- a/conf/modules/gens_pon.config +++ b/conf/modules/gens_pon.config @@ -27,7 +27,7 @@ process { ] } - withName: '.*GENS_PON:PICARD_CREATESEQUENCEDICTIONARY' { + withName: '.*GENS_PON:GATK4_CREATESEQUENCEDICTIONARY' { ext.when = { params.dict.equals(null) } publishDir = [ mode: params.publish_dir_mode, diff --git a/conf/modules/germlinecnvcaller_cohort.config b/conf/modules/germlinecnvcaller_cohort.config index d4056f9..ea074a8 100644 --- a/conf/modules/germlinecnvcaller_cohort.config +++ b/conf/modules/germlinecnvcaller_cohort.config @@ -27,7 +27,7 @@ process { ] } - withName: '.*GERMLINECNVCALLER_COHORT:PICARD_CREATESEQUENCEDICTIONARY' { + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_CREATESEQUENCEDICTIONARY' { ext.when = { params.dict.equals(null) } publishDir = [ mode: params.publish_dir_mode, diff --git a/modules.json b/modules.json index 143d68a..87e2403 100644 --- a/modules.json +++ b/modules.json @@ -30,6 +30,11 @@ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, + "gatk4/createsequencedictionary": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "gatk4/createsomaticpanelofnormals": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", @@ -80,11 +85,6 @@ "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", "installed_by": ["modules"] }, - "picard/createsequencedictionary": { - "branch": "master", - "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", - "installed_by": ["modules"] - }, "samtools/faidx": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", diff --git a/modules/nf-core/picard/createsequencedictionary/environment.yml b/modules/nf-core/gatk4/createsequencedictionary/environment.yml similarity index 65% rename from modules/nf-core/picard/createsequencedictionary/environment.yml rename to modules/nf-core/gatk4/createsequencedictionary/environment.yml index 1d715d5..55993f4 100644 --- a/modules/nf-core/picard/createsequencedictionary/environment.yml +++ b/modules/nf-core/gatk4/createsequencedictionary/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::picard=3.3.0 + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf new file mode 100644 index 0000000..c7f1d75 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf @@ -0,0 +1,52 @@ +process GATK4_CREATESEQUENCEDICTIONARY { + tag "$fasta" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('*.dict') , emit: dict + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def avail_mem = 6144 + if (!task.memory) { + log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateSequenceDictionary \\ + --REFERENCE $fasta \\ + --URI $fasta \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta.baseName}.dict + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/gatk4/createsequencedictionary/meta.yml new file mode 100644 index 0000000..7b5156b --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/meta.yml @@ -0,0 +1,49 @@ +name: gatk4_createsequencedictionary +description: Creates a sequence dictionary for a reference sequence +keywords: + - createsequencedictionary + - dictionary + - fasta + - gatk4 +tools: + - gatk: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Input fasta file + pattern: "*.{fasta,fa}" +output: + - dict: + - meta: + type: file + description: gatk dictionary file + pattern: "*.{dict}" + - "*.dict": + type: file + description: gatk dictionary file + pattern: "*.{dict}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" + - "@ramprasadn" +maintainers: + - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/picard/createsequencedictionary/tests/main.nf.test b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test similarity index 56% rename from modules/nf-core/picard/createsequencedictionary/tests/main.nf.test rename to modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test index 8152be9..a8a9c6d 100644 --- a/modules/nf-core/picard/createsequencedictionary/tests/main.nf.test +++ b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test @@ -1,22 +1,22 @@ nextflow_process { - name "Test Process PICARD_CREATESEQUENCEDICTIONARY" + name "Test Process GATK4_CREATESEQUENCEDICTIONARY" script "../main.nf" - process "PICARD_CREATESEQUENCEDICTIONARY" + process "GATK4_CREATESEQUENCEDICTIONARY" tag "modules" tag "modules_nfcore" - tag "picard" - tag "picard/createsequencedictionary" + tag "gatk4" + tag "gatk4/createsequencedictionary" test("sarscov2 - fasta") { when { process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] """ } } @@ -24,12 +24,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.reference_dict[0][1]).name, - process.out.versions - ).match() - } - + { assert snapshot(process.out).match() } ) } @@ -42,9 +37,9 @@ nextflow_process { when { process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] """ } } diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap new file mode 100644 index 0000000..16735f9 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "sarscov2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.dict:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6" + ], + "dict": [ + [ + { + "id": "test" + }, + "genome.dict:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-05-16T10:16:16.34453" + }, + "sarscov2 - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.dict:md5,7362679f176e0f52add03c08f457f646" + ] + ], + "1": [ + "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6" + ], + "dict": [ + [ + { + "id": "test" + }, + "genome.dict:md5,7362679f176e0f52add03c08f457f646" + ] + ], + "versions": [ + "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-05-16T13:58:25.822068" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml b/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml new file mode 100644 index 0000000..035c5e4 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/createsequencedictionary: + - "modules/nf-core/gatk4/createsequencedictionary/**" diff --git a/modules/nf-core/picard/createsequencedictionary/main.nf b/modules/nf-core/picard/createsequencedictionary/main.nf deleted file mode 100644 index 49637d1..0000000 --- a/modules/nf-core/picard/createsequencedictionary/main.nf +++ /dev/null @@ -1,54 +0,0 @@ -process PICARD_CREATESEQUENCEDICTIONARY { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.3.0--hdfd78af_0' : - 'biocontainers/picard:3.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path("*.dict"), emit: reference_dict - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def avail_mem = 3072 - if (!task.memory) { - log.info '[Picard CreateSequenceDictionary] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() - } - """ - picard \\ - -Xmx${avail_mem}M \\ - CreateSequenceDictionary \\ - $args \\ - --REFERENCE $fasta \\ - --OUTPUT ${prefix}.dict - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - picard: \$(picard CreateSequenceDictionary --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.dict - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - picard: \$(echo \$(picard CreateSequenceDictionary --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) - END_VERSIONS - """ - -} diff --git a/modules/nf-core/picard/createsequencedictionary/meta.yml b/modules/nf-core/picard/createsequencedictionary/meta.yml deleted file mode 100644 index 6761670..0000000 --- a/modules/nf-core/picard/createsequencedictionary/meta.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: picard_createsequencedictionary -description: Creates a sequence dictionary for a reference sequence. -keywords: - - sequence - - dictionary - - picard -tools: - - picard: - description: | - Creates a sequence dictionary file (with ".dict" extension) from a reference sequence provided in FASTA format, which is required by many processing and analysis tools. The output file contains a header but no SAMRecords, and the header contains only sequence records. - homepage: https://broadinstitute.github.io/picard/ - documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360036712531-CreateSequenceDictionary-Picard- - tool_dev_url: https://github.com/broadinstitute/picard - licence: ["MIT"] - identifier: biotools:picard_tools -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" -output: - - reference_dict: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.dict": - type: file - description: picard dictionary file - pattern: "*.{dict}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@sateeshperi" - - "@mjcipriano" - - "@hseabolt" -maintainers: - - "@sateeshperi" - - "@mjcipriano" - - "@hseabolt" diff --git a/modules/nf-core/picard/createsequencedictionary/tests/main.nf.test.snap b/modules/nf-core/picard/createsequencedictionary/tests/main.nf.test.snap deleted file mode 100644 index 8142f2a..0000000 --- a/modules/nf-core/picard/createsequencedictionary/tests/main.nf.test.snap +++ /dev/null @@ -1,48 +0,0 @@ -{ - "sarscov2 - fasta - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.dict:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - "versions.yml:md5,3ed186b70c3df55fd4965f48485a01cb" - ], - "reference_dict": [ - [ - { - "id": "test" - }, - "test.dict:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,3ed186b70c3df55fd4965f48485a01cb" - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-10-18T10:49:55.602683425" - }, - "sarscov2 - fasta": { - "content": [ - "test.dict", - [ - "versions.yml:md5,3ed186b70c3df55fd4965f48485a01cb" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" - }, - "timestamp": "2024-10-18T10:49:40.396060224" - } -} \ No newline at end of file diff --git a/nextflow_schema.json b/nextflow_schema.json index 1532230..24784c7 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -84,7 +84,7 @@ "description": "When scattering with this argument, each of the resultant files will (ideally) have this amount of interval-counts.", "default": 5000, "fa_icon": "fas fa-sort-numeric-down", - "help_text": "Used by GATK/Picards's IntervalListTools." + "help_text": "Used by GATK's IntervalListTools." }, "gcnv_segmental_duplications": { "type": "string", diff --git a/subworkflows/local/gens_pon/main.nf b/subworkflows/local/gens_pon/main.nf index f9782a4..b82e2ba 100644 --- a/subworkflows/local/gens_pon/main.nf +++ b/subworkflows/local/gens_pon/main.nf @@ -1,7 +1,7 @@ include { GATK4_COLLECTREADCOUNTS } from '../../../modules/nf-core/gatk4/collectreadcounts/main' include { GATK4_CREATEREADCOUNTPANELOFNORMALS } from '../../../modules/nf-core/gatk4/createreadcountpanelofnormals/main' +include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary/main' include { GATK4_PREPROCESSINTERVALS } from '../../../modules/nf-core/gatk4/preprocessintervals/main' -include { PICARD_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/picard/createsequencedictionary/main' include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' @@ -21,10 +21,10 @@ workflow GENS_PON { // SAMTOOLS_FAIDX ( ch_fasta, [[:],[]] ) - PICARD_CREATESEQUENCEDICTIONARY ( ch_fasta ) + GATK4_CREATESEQUENCEDICTIONARY ( ch_fasta ) ch_user_dict - .mix(PICARD_CREATESEQUENCEDICTIONARY.out.reference_dict) + .mix(GATK4_CREATESEQUENCEDICTIONARY.out.dict) .collect() .set { ch_dict } @@ -79,8 +79,8 @@ workflow GENS_PON { ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions.first()) ch_versions = ch_versions.mix(GATK4_CREATEREADCOUNTPANELOFNORMALS.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) - ch_versions = ch_versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) diff --git a/subworkflows/local/germlinecnvcaller_cohort/main.nf b/subworkflows/local/germlinecnvcaller_cohort/main.nf index ce0fa00..df8e869 100644 --- a/subworkflows/local/germlinecnvcaller_cohort/main.nf +++ b/subworkflows/local/germlinecnvcaller_cohort/main.nf @@ -2,6 +2,7 @@ include { GATK4_ANNOTATEINTERVALS } from ' include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_TARGETS } from '../../../modules/nf-core/gatk4/bedtointervallist/main' include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_EXCLUDE } from '../../../modules/nf-core/gatk4/bedtointervallist/main' include { GATK4_COLLECTREADCOUNTS } from '../../../modules/nf-core/gatk4/collectreadcounts/main' +include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary/main' include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../../modules/nf-core/gatk4/determinegermlinecontigploidy/main' include { GATK4_FILTERINTERVALS } from '../../../modules/nf-core/gatk4/filterintervals/main' include { GATK4_GERMLINECNVCALLER } from '../../../modules/nf-core/gatk4/germlinecnvcaller/main' @@ -9,7 +10,6 @@ include { GATK4_INDEXFEATUREFILE as GATK4_INDEXFEATUREFILE_MAPPABILITY } from ' include { GATK4_INDEXFEATUREFILE as GATK4_INDEXFEATUREFILE_SEGDUP } from '../../../modules/nf-core/gatk4/indexfeaturefile/main' include { GATK4_INTERVALLISTTOOLS } from '../../../modules/nf-core/gatk4/intervallisttools/main' include { GATK4_PREPROCESSINTERVALS } from '../../../modules/nf-core/gatk4/preprocessintervals/main' -include { PICARD_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/picard/createsequencedictionary/main' include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' @@ -36,14 +36,14 @@ workflow GERMLINECNVCALLER_COHORT { // SAMTOOLS_FAIDX ( ch_fasta, [[:],[]] ) - PICARD_CREATESEQUENCEDICTIONARY ( ch_fasta ) + GATK4_CREATESEQUENCEDICTIONARY ( ch_fasta ) GATK4_INDEXFEATUREFILE_MAPPABILITY ( ch_mappable_regions ) GATK4_INDEXFEATUREFILE_SEGDUP ( ch_segmental_duplications ) ch_user_dict - .mix(PICARD_CREATESEQUENCEDICTIONARY.out.reference_dict) + .mix(GATK4_CREATESEQUENCEDICTIONARY.out.dict) .collect() .set { ch_dict } @@ -168,7 +168,7 @@ workflow GERMLINECNVCALLER_COHORT { GATK4_GERMLINECNVCALLER ( ch_cnvcaller_in ) ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) - ch_versions = ch_versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) + ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST_TARGETS.out.versions) From f725863f04dd1e4ee47cadf29c1ba2284f0dde22 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 12 Nov 2024 10:29:26 +0100 Subject: [PATCH 144/234] update snaps --- tests/gens_pon.nf.test.snap | 8 ++++---- tests/germlinecnvcaller_cohort.nf.test.snap | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/gens_pon.nf.test.snap b/tests/gens_pon.nf.test.snap index 1ecaec1..0dd17fe 100644 --- a/tests/gens_pon.nf.test.snap +++ b/tests/gens_pon.nf.test.snap @@ -9,11 +9,11 @@ "GATK4_CREATEREADCOUNTPANELOFNORMALS": { "gatk4": "4.5.0.0" }, - "GATK4_PREPROCESSINTERVALS": { + "GATK4_CREATESEQUENCEDICTIONARY": { "gatk4": "4.5.0.0" }, - "PICARD_CREATESEQUENCEDICTIONARY": { - "picard": "3.3.0" + "GATK4_PREPROCESSINTERVALS": { + "gatk4": "4.5.0.0" }, "SAMTOOLS_FAIDX": { "samtools": 1.21 @@ -56,6 +56,6 @@ "nf-test": "0.9.1", "nextflow": "24.10.0" }, - "timestamp": "2024-11-04T19:29:15.733933882" + "timestamp": "2024-11-12T10:09:54.111724435" } } \ No newline at end of file diff --git a/tests/germlinecnvcaller_cohort.nf.test.snap b/tests/germlinecnvcaller_cohort.nf.test.snap index 01e3582..e603a3e 100644 --- a/tests/germlinecnvcaller_cohort.nf.test.snap +++ b/tests/germlinecnvcaller_cohort.nf.test.snap @@ -9,6 +9,9 @@ "GATK4_COLLECTREADCOUNTS": { "gatk4": "4.5.0.0" }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.5.0.0" + }, "GATK4_DETERMINEGERMLINECONTIGPLOIDY": { "gatk4": "4.5.0.0" }, @@ -24,9 +27,6 @@ "GATK4_PREPROCESSINTERVALS": { "gatk4": "4.5.0.0" }, - "PICARD_CREATESEQUENCEDICTIONARY": { - "picard": "3.3.0" - }, "SAMTOOLS_FAIDX": { "samtools": 1.21 }, @@ -187,6 +187,6 @@ "nf-test": "0.9.1", "nextflow": "24.10.0" }, - "timestamp": "2024-11-04T19:45:08.802551855" + "timestamp": "2024-11-12T10:28:53.009826572" } } \ No newline at end of file From af3cbe0b9d1beba2b61dca4fb37e3aa712bc9dcd Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 28 Mar 2025 21:22:52 +0100 Subject: [PATCH 145/234] update actions cache --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e34b245..6cd96e2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,7 +63,7 @@ jobs: version: ${{ env.NFT_VER }} - name: Cache pdiff - uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4 + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4 id: cache-pip-pdiff with: path: ~/.cache/pip From 1f68999d56bb47647ddb2d09bcfe6b2f409cab52 Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 29 Mar 2025 10:28:54 +0100 Subject: [PATCH 146/234] update test config --- nf-test.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nf-test.config b/nf-test.config index 69ef731..d5889c0 100644 --- a/nf-test.config +++ b/nf-test.config @@ -8,6 +8,9 @@ config { // location of an optional nextflow.config file specific for executing tests configFile "conf/test.config" + // ignore tests coming from the nf-core/modules repo + ignore 'modules/nf-core/**/*', 'subworkflows/nf-core/**/*' + // run all test with defined profile(s) from the main nextflow.config profile "test" From 665ee06ff18a0ab0aaaadae3b4e587eae6d3b90b Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Mon, 31 Mar 2025 13:29:57 +0200 Subject: [PATCH 147/234] Update nextflow.config --- nextflow.config | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/nextflow.config b/nextflow.config index a28e061..aa3e8e6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -253,12 +253,12 @@ manifest { contributors = [ // TODO nf-core: Update the field with the details of the contributors to your pipeline. New with Nextflow version 24.10.0 [ - name: '@maxulysse', - affiliation: '', - email: '', - github: '', - contribution: [], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '' + name: 'Maxime U Garcia', + affiliation: 'Seqera', + email: 'maxime.garcia@seqera.io', + github: '@maxulysse', + contribution: ['author'], + orcid: '0000-0003-2827-9261' ], ] homePage = 'https://github.com/nf-core/createpanelrefs' From ec8f3c4ed35b82229be5e9af898caed3ce38e38f Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 31 Mar 2025 13:30:48 +0200 Subject: [PATCH 148/234] Update nextflow.config --- nextflow.config | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nextflow.config b/nextflow.config index aa3e8e6..5c215f5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -260,6 +260,14 @@ manifest { contribution: ['author'], orcid: '0000-0003-2827-9261' ], + [ + name: 'Ramprasad Neethiraj', + affiliation: 'School of Engineering sciences in Chemistry, Biotechnology and Health, KTH Royal Institute of Technology, Stockholm, Sweden; Science for Life Laboratory, Department of Microbiology, Tumour and Cell Biology, Karolinska Institutet, Stockholm, Sweden', + email: 'rne@kth.se', + github: 'https://github.com/ramprasadn', + contribution: ['author'], + orcid: 'https://orcid.org/0000-0001-7313-3734' + ] ] homePage = 'https://github.com/nf-core/createpanelrefs' description = """Generate Panel of Normals, models or other similar references from lots of samples""" From dacb59533e32f91bc7836e655f7818fec26e402a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 8 Apr 2025 16:17:59 +0200 Subject: [PATCH 149/234] cnvkit with cram files --- conf/modules/cnvkit.config | 4 + conf/test.config | 2 +- modules.json | 5 + modules/nf-core/samtools/view/environment.yml | 9 + modules/nf-core/samtools/view/main.nf | 103 ++ modules/nf-core/samtools/view/meta.yml | 145 +++ .../nf-core/samtools/view/tests/bam.config | 3 + .../samtools/view/tests/bam_index.config | 3 + .../samtools/view/tests/cram_index.config | 3 + .../nf-core/samtools/view/tests/main.nf.test | 463 +++++++++ .../samtools/view/tests/main.nf.test.snap | 972 ++++++++++++++++++ tests/csv/1.0.0/bam_cram.csv | 5 + tests/csv/1.0.0/cram.csv | 3 + tests/default.nf.test.snap | 31 +- workflows/createpanelrefs.nf | 21 +- 15 files changed, 1759 insertions(+), 13 deletions(-) create mode 100644 modules/nf-core/samtools/view/environment.yml create mode 100644 modules/nf-core/samtools/view/main.nf create mode 100644 modules/nf-core/samtools/view/meta.yml create mode 100644 modules/nf-core/samtools/view/tests/bam.config create mode 100644 modules/nf-core/samtools/view/tests/bam_index.config create mode 100644 modules/nf-core/samtools/view/tests/cram_index.config create mode 100644 modules/nf-core/samtools/view/tests/main.nf.test create mode 100644 modules/nf-core/samtools/view/tests/main.nf.test.snap create mode 100644 tests/csv/1.0.0/bam_cram.csv create mode 100644 tests/csv/1.0.0/cram.csv diff --git a/conf/modules/cnvkit.config b/conf/modules/cnvkit.config index 0e29261..74ae34c 100644 --- a/conf/modules/cnvkit.config +++ b/conf/modules/cnvkit.config @@ -12,6 +12,10 @@ process { + withName: SAMTOOLS_VIEW { + ext.args = {"--output-fmt bam"} + } + withName: CNVKIT_BATCH { ext.args = {"--method wgs --output-reference ${meta.id}.cnn"} publishDir = [ diff --git a/conf/test.config b/conf/test.config index 0d3f9fa..fab55ad 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,7 +23,7 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - input = "${projectDir}/tests/csv/1.0.0/bam.csv" + input = "${projectDir}/tests/csv/1.0.0/bam_cram.csv" // Main options tools = 'cnvkit' diff --git a/modules.json b/modules.json index f860291..95a8743 100644 --- a/modules.json +++ b/modules.json @@ -94,6 +94,11 @@ "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", "installed_by": ["modules"] + }, + "samtools/view": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml new file mode 100644 index 0000000..8cae571 --- /dev/null +++ b/modules/nf-core/samtools/view/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf new file mode 100644 index 0000000..f43a4c6 --- /dev/null +++ b/modules/nf-core/samtools/view/main.nf @@ -0,0 +1,103 @@ +process SAMTOOLS_VIEW { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(index) + tuple val(meta2), path(fasta) + path qname + val index_format + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${file_type}.bai"), emit: bai, optional: true + tuple val(meta), path("${prefix}.${file_type}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${file_type}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}"), emit: unselected, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}.{csi,crai}"), emit: unselected_index, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + + output_file = index_format ? "${prefix}.${file_type}##idx##${prefix}.${file_type}.${index_format} --write-index" : "${prefix}.${file_type}" + // Can't choose index type of unselected file + readnames = qname ? "--qname-file ${qname} --output-unselected ${prefix}.unselected.${file_type}": "" + + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + if (index_format) { + if (!index_format.matches('bai|csi|crai')) { + error "Index format not one of bai, csi, crai." + } else if (file_type == "sam") { + error "Indexing not compatible with SAM output" + } + } + """ + samtools \\ + view \\ + --threads ${task.cpus-1} \\ + ${reference} \\ + ${readnames} \\ + $args \\ + -o ${output_file} \\ + $input \\ + $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + default_index_format = + file_type == "bam" ? "csi" : + file_type == "cram" ? "crai" : "" + index = index_format ? "touch ${prefix}.${file_type}.${index_format}" : args.contains("--write-index") ? "touch ${prefix}.${file_type}.${default_index_format}" : "" + unselected = qname ? "touch ${prefix}.unselected.${file_type}" : "" + // Can't choose index type of unselected file + unselected_index = qname && (args.contains("--write-index") || index_format) ? "touch ${prefix}.unselected.${file_type}.${default_index_format}" : "" + + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + if (index_format) { + if (!index_format.matches('bai|csi|crai')) { + error "Index format not one of bai, csi, crai." + } else if (file_type == "sam") { + error "Indexing not compatible with SAM output." + } + } + """ + touch ${prefix}.${file_type} + ${index} + ${unselected} + ${unselected_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml new file mode 100644 index 0000000..28c268a --- /dev/null +++ b/modules/nf-core/samtools/view/meta.yml @@ -0,0 +1,145 @@ +name: samtools_view +description: filter/convert SAM/BAM/CRAM file +keywords: + - view + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index: + type: file + description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) + pattern: "*.{.bai,.csi,.crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" + - - index_format: + type: string + description: Index format, used together with ext.args = '--write-index' + pattern: "bai|csi|crai" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: optional filtered/converted BAM file + pattern: "*.{bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: optional filtered/converted CRAM file + pattern: "*.{cram}" + - sam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.sam: + type: file + description: optional filtered/converted SAM file + pattern: "*.{sam}" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.bai: + type: file + description: optional BAM file index + pattern: "*.{bai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.csi: + type: file + description: optional tabix BAM file index + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.crai: + type: file + description: optional CRAM file index + pattern: "*.{crai}" + - unselected: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}: + type: file + description: optional file with unselected alignments + pattern: "*.unselected.{bam,cram,sam}" + - unselected_index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}.{csi,crai}: + type: file + description: index for the "unselected" file + pattern: "*.unselected.{csi,crai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" +maintainers: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/samtools/view/tests/bam.config b/modules/nf-core/samtools/view/tests/bam.config new file mode 100644 index 0000000..c10d108 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/bam_index.config b/modules/nf-core/samtools/view/tests/bam_index.config new file mode 100644 index 0000000..771ae03 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam_index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam --write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/cram_index.config b/modules/nf-core/samtools/view/tests/cram_index.config new file mode 100644 index 0000000..ed87c33 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/cram_index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt cram --write-index" +} diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test new file mode 100644 index 0000000..d8551dd --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -0,0 +1,463 @@ +nextflow_process { + + name "Test Process SAMTOOLS_VIEW" + script "../main.nf" + process "SAMTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/view" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.bai).match("bam_bai") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.sam).match("bam_sam") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("bam_csi_index") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + input[3] = 'csi' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + process.out.versions).match() + } + ) + } + } + + test("bam_bai_index") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + input[3] = 'bai' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.bai[0][1]).name, + process.out.versions).match() } + ) + } + } + + test("bam_bai_index_unselected") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = Channel.of('testN:1') + .collectFile(name: 'selected_reads.txt') + input[3] = 'bai' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.bai[0][1]).name, + file(process.out.unselected[0][1]).name, + file(process.out.unselected_index[0][1]).name, + process.out.versions).match() + } + ) + } + } + + test("cram_crai_index_unselected") { + + config "./cram_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = Channel.of('testN:1') + .collectFile(name: 'selected_reads.txt') + input[3] = 'crai' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + file(process.out.crai[0][1]).name, + file(process.out.unselected[0][1]).name, + file(process.out.unselected_index[0][1]).name, + process.out.versions).match() + } + ) + } + } + + test("cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("cram_cram") }, + { assert snapshot(process.out.bai).match("cram_bai") }, + { assert snapshot(process.out.bam).match("cram_bam") }, + { assert snapshot(process.out.crai).match("cram_crai") }, + { assert snapshot(process.out.csi).match("cram_csi") }, + { assert snapshot(process.out.sam).match("cram_sam") }, + { assert snapshot(process.out.versions).match("cram_versions") } + ) + } + } + + test("cram_to_bam") { + + config "./bam.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_bam") }, + { assert snapshot(process.out.bai).match("cram_to_bam_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_cram") }, + { assert snapshot(process.out.csi).match("cram_to_bam_csi") }, + { assert snapshot(process.out.sam).match("cram_to_bam_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_versions") } + ) + } + } + + test("cram_to_bam_index") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_versions") } + ) + } + } + + test("cram_to_bam_index_qname") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of("testN:2817", "testN:2814").collectFile(name: "readnames.list", newLine: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_qname_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_qname_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_qname_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") }, + { assert snapshot(file(process.out.unselected[0][1]).name).match("cram_to_bam_index_qname_unselected") }, + { assert snapshot(file(process.out.unselected_index[0][1]).name).match("cram_to_bam_index_qname_unselected_csi") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") } + ) + } + } + + test("bam_stub") { + + options "-stub" + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bam_stub_csi") }, + { assert snapshot(process.out.bai).match("bam_stub_bai") }, + { assert snapshot(process.out.crai).match("bam_stub_crai") }, + { assert snapshot(process.out.cram).match("bam_stub_cram") }, + { assert snapshot(process.out.sam).match("bam_stub_sam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } + ) + } + } + + test("bam_csi_index - stub") { + + options "-stub" + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + input[3] = 'csi' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("bam_bai_index - stub") { + + options "-stub" + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + input[3] = 'bai' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("bam_bai_index_uselected - stub") { + + options "-stub" + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = Channel.of('testN:1') + .collectFile(name: 'selected_reads.txt') + input[3] = 'bai' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("cram_crai_index_unselected - stub") { + + options "-stub" + config "./cram_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = Channel.of('testN:1') + .collectFile(name: 'selected_reads.txt') + input[3] = 'crai' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap new file mode 100644 index 0000000..1cb793f --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -0,0 +1,972 @@ +{ + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.256068" + }, + "bam_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.065301" + }, + "bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.258578" + }, + "bam_stub_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.071284" + }, + "bam_stub_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-02-14T07:43:43.6526401" + }, + "cram_to_bam_index_qname_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.325496" + }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.262882" + }, + "cram_to_bam_index_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.95456" + }, + "cram_to_bam_index_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:25:14.475388399" + }, + "bam_csi_index": { + "content": [ + "test.bam", + "test.bam.csi", + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-02-14T07:45:19.718077276" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-02-13T16:13:00.739468586" + }, + "cram_crai_index_unselected - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.unselected.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.unselected.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ], + "bai": [ + + ], + "bam": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + [ + { + "id": "test", + "single_end": false + }, + "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "sam": [ + + ], + "unselected": [ + [ + { + "id": "test", + "single_end": false + }, + "test.unselected.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "unselected_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.unselected.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-02-14T07:47:20.903462221" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.259774" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.261287" + }, + "cram_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.502625" + }, + "cram_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-02-13T16:33:28.319991831" + }, + "cram_to_bam_index_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.962863" + }, + "cram_to_bam_index_qname_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.337634" + }, + "bam_csi_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ], + "bai": [ + + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + + ], + "unselected": [ + + ], + "unselected_index": [ + + ], + "versions": [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-02-14T07:46:52.477256747" + }, + "cram_to_bam_index_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.958617" + }, + "bam_bai_index": { + "content": [ + "test.bam", + "test.bam.bai", + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-02-14T07:45:29.205677197" + }, + "cram_to_bam_index_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.972288" + }, + "bam_bai_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + + ], + "sam": [ + + ], + "unselected": [ + + ], + "unselected_index": [ + + ], + "versions": [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-02-14T07:51:10.220507926" + }, + "cram_to_bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.999247" + }, + "cram_to_bam_index_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.976457" + }, + "cram_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.497581" + }, + "cram_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.50038" + }, + "cram_to_bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.992239" + }, + "bam_stub_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.079529" + }, + "cram_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.490286" + }, + "cram_to_bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.989247" + }, + "cram_to_bam_index_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.967681" + }, + "cram_crai_index_unselected": { + "content": [ + "test.cram", + "test.cram.crai", + "test.unselected.cram", + "test.unselected.cram.crai", + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-02-14T07:45:48.461930073" + }, + "cram_to_bam_index_qname_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:25:51.953436682" + }, + "cram_to_bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.982361" + }, + "cram_to_bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.98601" + }, + "cram_to_bam_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-02-13T16:33:39.363718229" + }, + "cram_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.495512" + }, + "bam_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.076908" + }, + "cram_to_bam_index_qname_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "cram_to_bam_index_qname_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.330789" + }, + "cram_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.493129" + }, + "bam_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.074313" + }, + "cram_to_bam_index_qname_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "bam_bai_index_unselected": { + "content": [ + "test.bam", + "test.bam.bai", + "test.unselected.bam", + "test.unselected.bam.csi", + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-02-14T07:45:38.993014707" + }, + "cram_to_bam_index_qname_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.333248" + }, + "cram_to_bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.995454" + }, + "bam_bai_index_uselected - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.unselected.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.unselected.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + + ], + "sam": [ + + ], + "unselected": [ + [ + { + "id": "test", + "single_end": false + }, + "test.unselected.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "unselected_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.unselected.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-02-14T07:51:24.947216832" + }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.264651" + }, + "bam_stub_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.068596" + } +} \ No newline at end of file diff --git a/tests/csv/1.0.0/bam_cram.csv b/tests/csv/1.0.0/bam_cram.csv new file mode 100644 index 0000000..a10b04c --- /dev/null +++ b/tests/csv/1.0.0/bam_cram.csv @@ -0,0 +1,5 @@ +sample,bam,cram +sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam +sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam +sample3,,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram +sample4,,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram diff --git a/tests/csv/1.0.0/cram.csv b/tests/csv/1.0.0/cram.csv new file mode 100644 index 0000000..67a3fd1 --- /dev/null +++ b/tests/csv/1.0.0/cram.csv @@ -0,0 +1,3 @@ +sample,cram +sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram +sample4,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 6ca813f..da7cb82 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,7 +1,7 @@ { "Run with profile test | default aka cnvkit": { "content": [ - 2, + 4, { "CNVKIT_BATCH": { "cnvkit": "0.9.10" @@ -24,24 +24,37 @@ "reference", "reference/cnvkit", "reference/cnvkit/panel.cnn", + "reference/cnvkit/sample3.antitargetcoverage.cnn", + "reference/cnvkit/sample3.targetcoverage.cnn", + "reference/cnvkit/sample4.antitargetcoverage.cnn", + "reference/cnvkit/sample4.targetcoverage.cnn", "reference/cnvkit/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn", "reference/cnvkit/test.paired_end.recalibrated.sorted.targetcoverage.cnn", "reference/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn", - "reference/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn" + "reference/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn", + "samtools", + "samtools/sample3.bam", + "samtools/sample4.bam" ], [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "panel.cnn:md5,68028cd2b4e0fc4489bf5bfd0a73440f", + "panel.cnn:md5,1443acdb3bb430b0c144ec100ef8a514", + "sample3.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "sample3.targetcoverage.cnn:md5,814200aceed64f3e0c4a69dab64553c4", + "sample4.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "sample4.targetcoverage.cnn:md5,ae3bfc49096f86e48c37bc9b997982fb", "test.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", - "test.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,0067cc3a0e479b23ab3bf056cead31b4", + "test.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,814200aceed64f3e0c4a69dab64553c4", "test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", - "test2.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,1e1012812eb893afd931485cb760294e" + "test2.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,ae3bfc49096f86e48c37bc9b997982fb", + "sample3.bam:md5,28d7b627bcc8a220253ce2a950b18b56", + "sample4.bam:md5,d221a5042fd4cdec4fcb91e89e8bb92e" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-10-11T11:10:55.4912" + "timestamp": "2025-04-08T16:03:23.731350516" } -} +} \ No newline at end of file diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 04e986b..c293c40 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -26,6 +26,7 @@ include { BAM_CREATE_SOM_PON_GATK } from '../subworkflows/nf-core/bam_create */ include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch' +include { SAMTOOLS_VIEW } from '../modules/nf-core/samtools/view' include { MULTIQC } from '../modules/nf-core/multiqc' // Initialize file channels based on params, defined in the params.genomes[params.genome] scope @@ -85,10 +86,24 @@ workflow CREATEPANELREFS { if (params.tools && params.tools.split(',').contains('cnvkit')) { ch_samplesheet - .map{ meta, bam, bai, cram, crai -> [meta + [id:'panel'], bam]} + .branch { meta, bam, bai, cram, crai -> + bamfiles: bam + return [meta, bam] + cramfiles: cram + return [meta, cram, crai] + } + .set { ch_input_by_fmt } + + SAMTOOLS_VIEW (ch_input_by_fmt.cramfiles, ch_fasta, [], "").bam + .mix(ch_input_by_fmt.bamfiles) + .map { meta, bam -> + return [meta + [id:'panel'], bam] + } .groupTuple() - .map {meta, bam -> [ meta, [], bam ]} - .set { ch_cnvkit_input } + .map { meta, bam -> + return [meta, [], bam] + } + .set {ch_cnvkit_input} CNVKIT_BATCH ( ch_cnvkit_input, ch_fasta, [[:],[]], ch_cnvkit_targets, [[:],[]], true ) ch_versions = ch_versions.mix(CNVKIT_BATCH.out.versions) From 5d470e1789843861655e903f856b5d3b7d6a9d64 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 9 Apr 2025 15:07:59 +0200 Subject: [PATCH 150/234] change tests name --- tests/default.nf.test | 2 +- tests/default.nf.test.snap | 10 +++++----- tests/gens_pon.nf.test | 2 +- tests/gens_pon.nf.test.snap | 8 ++++---- tests/germlinecnvcaller_cohort.nf.test | 2 +- tests/germlinecnvcaller_cohort.nf.test.snap | 8 ++++---- tests/mutect2.nf.test | 2 +- tests/mutect2.nf.test.snap | 8 ++++---- 8 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tests/default.nf.test b/tests/default.nf.test index 261cb87..0f3df9d 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -5,7 +5,7 @@ nextflow_pipeline { tag "pipeline" tag "pipeline_createpanelrefs" - test("Run with profile test | default aka cnvkit") { + test("-profile test --tools cnvkit") { when { params { diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 6ca813f..233ce13 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,5 +1,5 @@ { - "Run with profile test | default aka cnvkit": { + "-profile test --tools cnvkit": { "content": [ 2, { @@ -39,9 +39,9 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-10-11T11:10:55.4912" + "timestamp": "2025-04-09T14:47:24.266861802" } -} +} \ No newline at end of file diff --git a/tests/gens_pon.nf.test b/tests/gens_pon.nf.test index 6977905..0d80ae8 100644 --- a/tests/gens_pon.nf.test +++ b/tests/gens_pon.nf.test @@ -6,7 +6,7 @@ nextflow_pipeline { tag "pipeline_createpanelrefs" config "./gens_pon.config" - test("Run gens test") { + test("-profile test --tools gens --input tests/csv/1.0.0/bam_sorted.csv --gens_bin_length 100 --gens_pon_name gens_pon --gens_readcount_format TSV") { when { params { diff --git a/tests/gens_pon.nf.test.snap b/tests/gens_pon.nf.test.snap index 0dd17fe..302a6f8 100644 --- a/tests/gens_pon.nf.test.snap +++ b/tests/gens_pon.nf.test.snap @@ -1,5 +1,5 @@ { - "Run gens test": { + "-profile test --tools gens --input tests/csv/1.0.0/bam_sorted.csv --gens_bin_length 100 --gens_pon_name gens_pon --gens_readcount_format TSV": { "content": [ 7, { @@ -53,9 +53,9 @@ ] ], "meta": { - "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-11-12T10:09:54.111724435" + "timestamp": "2025-04-09T14:48:02.583474325" } } \ No newline at end of file diff --git a/tests/germlinecnvcaller_cohort.nf.test b/tests/germlinecnvcaller_cohort.nf.test index ba9bd6d..465b550 100644 --- a/tests/germlinecnvcaller_cohort.nf.test +++ b/tests/germlinecnvcaller_cohort.nf.test @@ -6,7 +6,7 @@ nextflow_pipeline { tag "pipeline_createpanelrefs" config "./germlinecnvcaller_cohort.config" - test("Run germlinecnvcaller test") { + test("-profile test --tools germlinecnvcaller --input tests/csv/1.0.0/bam_sorted.csv --gcnv_model_name cohort --gcnv_ploidy_priors --gcnv_scatter_content 2") { when { params { diff --git a/tests/germlinecnvcaller_cohort.nf.test.snap b/tests/germlinecnvcaller_cohort.nf.test.snap index e603a3e..2a525ef 100644 --- a/tests/germlinecnvcaller_cohort.nf.test.snap +++ b/tests/germlinecnvcaller_cohort.nf.test.snap @@ -1,5 +1,5 @@ { - "Run germlinecnvcaller test": { + "-profile test --tools germlinecnvcaller --input tests/csv/1.0.0/bam_sorted.csv --gcnv_model_name cohort --gcnv_ploidy_priors --gcnv_scatter_content 2": { "content": [ 12, { @@ -184,9 +184,9 @@ ] ], "meta": { - "nf-test": "0.9.1", - "nextflow": "24.10.0" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-11-12T10:28:53.009826572" + "timestamp": "2025-04-09T15:03:29.439388615" } } \ No newline at end of file diff --git a/tests/mutect2.nf.test b/tests/mutect2.nf.test index 61a62f6..a346052 100644 --- a/tests/mutect2.nf.test +++ b/tests/mutect2.nf.test @@ -4,7 +4,7 @@ nextflow_pipeline { script "main.nf" tag "MUTECT2" - test("Run MUTECT2 test") { + test("-profile test --tools mutect2 --mutect2_pon_name test") { when { params { diff --git a/tests/mutect2.nf.test.snap b/tests/mutect2.nf.test.snap index dd8e352..0e85463 100644 --- a/tests/mutect2.nf.test.snap +++ b/tests/mutect2.nf.test.snap @@ -1,5 +1,5 @@ { - "Run MUTECT2 test": { + "-profile test --tools mutect2 --mutect2_pon_name test": { "content": [ 5, { @@ -52,9 +52,9 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.09.0" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2024-10-11T12:02:22.796491" + "timestamp": "2025-04-09T15:07:42.422460888" } } \ No newline at end of file From a284e748103ff5cc929f3910c77162e48e2ecb1c Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 9 Apr 2025 15:11:21 +0200 Subject: [PATCH 151/234] add auto shard --- .github/actions/get-shards/action.yml | 66 ++++++++++++ .github/actions/nf-test/action.yml | 118 ++++++++++++++++++++ .github/workflows/ci.yml | 127 ---------------------- .github/workflows/nf-test.yml | 149 ++++++++++++++++++++++++++ .nf-core.yml | 1 + nf-test.config | 11 +- 6 files changed, 341 insertions(+), 131 deletions(-) create mode 100644 .github/actions/get-shards/action.yml create mode 100644 .github/actions/nf-test/action.yml delete mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/nf-test.yml diff --git a/.github/actions/get-shards/action.yml b/.github/actions/get-shards/action.yml new file mode 100644 index 0000000..6d388be --- /dev/null +++ b/.github/actions/get-shards/action.yml @@ -0,0 +1,66 @@ +name: "Get number of shards" +description: "Get the number of nf-test shards for the current CI job" +inputs: + max_shards: + description: "Maximum number of shards allowed" + required: true + paths: + description: "Component paths to test" + required: false +outputs: + shard: + description: "Array of shard numbers" + value: ${{ steps.shards.outputs.shard }} + total_shards: + description: "Total number of shards" + value: ${{ steps.shards.outputs.total_shards }} +runs: + using: "composite" + steps: + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: ${{ env.NFT_VER }} + - name: Get number of shards + id: shards + shell: bash + run: | + # Run nf-test with dynamic parameter + nftest_output=$(nf-test test \ + --dry-run \ + --profile +docker \ + --filter function,workflow,pipeline \ + --ci \ + --changed-since HEAD^) || { + echo "nf-test command failed with exit code $?" + echo "Full output: $nftest_output" + exit 1 + } + echo "nf-test dry-run output: $nftest_output" + + # Default values for shard and total_shards + shard="[]" + total_shards=0 + + # Check if there are related tests + if echo "$nftest_output" | grep -q 'No tests to execute'; then + echo "No related tests found." + else + # Extract the number of related tests + number_of_shards=$(echo "$nftest_output" | sed -n 's|.*Executed \([0-9]*\) tests.*|\1|p') + if [[ -n "$number_of_shards" && "$number_of_shards" -gt 0 ]]; then + shards_to_run=$(( $number_of_shards < ${{ inputs.max_shards }} ? $number_of_shards : ${{ inputs.max_shards }} )) + shard=$(seq 1 "$shards_to_run" | jq -R . | jq -c -s .) + total_shards="$shards_to_run" + else + echo "Unexpected output format. Falling back to default values." + fi + fi + + # Write to GitHub Actions outputs + echo "shard=$shard" >> $GITHUB_OUTPUT + echo "total_shards=$total_shards" >> $GITHUB_OUTPUT + + # Debugging output + echo "Final shard array: $shard" + echo "Total number of shards: $total_shards" diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml new file mode 100644 index 0000000..e4fd9c3 --- /dev/null +++ b/.github/actions/nf-test/action.yml @@ -0,0 +1,118 @@ +name: "nf-test Action" +description: "Runs nf-test with common setup steps" +inputs: + profile: + description: "Profile to use" + required: true + shard: + description: "Shard number for this CI job" + required: true + total_shards: + description: "Total number of test shards(NOT the total number of matrix jobs)" + required: true + paths: + description: "Test paths" + required: true + +runs: + using: "composite" + steps: + - name: Setup Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ env.NXF_VERSION }}" + + - name: Set up Python + uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5 + with: + python-version: "3.11" + + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: "${{ env.NFT_VER }}" + + - name: Setup apptainer + if: contains(inputs.profile, 'singularity') + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: contains(inputs.profile, 'singularity') + shell: bash + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Conda setup + if: ${{inputs.profile == 'conda'}} + uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3 + with: + auto-update-conda: true + conda-solver: libmamba + conda-remove-defaults: true + + - name: Install pdiff + shell: bash + run: | + python -m pip install pdiff + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + # TODO Skip failing conda tests and document their failures + # https://github.com/nf-core/modules/issues/7017 + - name: Run nf-test + shell: bash + env: + NFT_DIFF: ${{ env.NFT_DIFF }} + NFT_DIFF_ARGS: ${{ env.NFT_DIFF_ARGS }} + NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + run: | + nf-test test \ + --profile=+${{ inputs.profile }} \ + --tap=test.tap \ + --verbose \ + --ci \ + --changed-since HEAD^ \ + --shard ${{ inputs.shard }}/${{ inputs.total_shards }} \ + --filter function,workflow,pipeline + + # Save the absolute path of the test.tap file to the output + echo "tap_file_path=$(realpath test.tap)" >> $GITHUB_OUTPUT + + - name: Generate test summary + if: always() + shell: bash + run: | + # Add header if it doesn't exist (using a token file to track this) + if [ ! -f ".summary_header" ]; then + echo "# 🚀 nf-test results" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Status | Test Name | Profile | Shard |" >> $GITHUB_STEP_SUMMARY + echo "|:------:|-----------|---------|-------|" >> $GITHUB_STEP_SUMMARY + touch .summary_header + fi + + if [ -f test.tap ]; then + while IFS= read -r line; do + if [[ $line =~ ^ok ]]; then + test_name="${line#ok }" + # Remove the test number from the beginning + test_name="${test_name#* }" + echo "| ✅ | ${test_name} | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + elif [[ $line =~ ^not\ ok ]]; then + test_name="${line#not ok }" + # Remove the test number from the beginning + test_name="${test_name#* }" + echo "| ❌ | ${test_name} | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + fi + done < test.tap + else + echo "| ⚠️ | No test results found | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + fi + + - name: Clean up + if: always() + shell: bash + run: | + sudo rm -rf /home/ubuntu/tests/ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 6cd96e2..0000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,127 +0,0 @@ -name: nf-core CI -# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors -on: - push: - branches: - - dev - pull_request: - release: - types: [published] - workflow_dispatch: - -env: - NFT_DIFF: "pdiff" - NFT_DIFF_ARGS: "--line-numbers --width 120 --expand-tabs=2" - NFT_VER: "0.9.0" - NFT_WORKDIR: "~" - NXF_ANSI_LOG: false - NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity - NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity - -concurrency: - group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" - cancel-in-progress: true - -jobs: - test: - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/createpanelrefs') }}" - runs-on: ubuntu-latest - name: "Test ${{ matrix.filter }} | ${{ matrix.profile }} | ${{ matrix.NXF_VER }} | ${{ matrix.shard }}/5" - strategy: - fail-fast: false - matrix: - NXF_VER: - - "24.04.2" - - "latest-everything" - filter: ["workflow", "function", "pipeline"] - # filter: ["process", "workflow", "function", "pipeline"] - profile: ["conda", "docker", "singularity"] - shard: [1, 2, 3, 4] - # Exclude conda and singularity on dev - isMaster: - - ${{ github.base_ref == 'master' }} - exclude: - - isMaster: false - profile: "conda" - - isMaster: false - profile: "singularity" - steps: - - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - with: - fetch-depth: 0 - - - name: Set up Nextflow - uses: nf-core/setup-nextflow@v2 - with: - version: "${{ matrix.NXF_VER }}" - - - name: Set up nf-test - uses: nf-core/setup-nf-test@v1 - with: - version: ${{ env.NFT_VER }} - - - name: Cache pdiff - uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4 - id: cache-pip-pdiff - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-pdiff - - - name: Set up pdiff to see diff between nf-test snapshots - run: | - python -m pip install --upgrade pip - pip install pdiff cryptography - - - name: Set up Apptainer - if: matrix.profile == 'singularity' - uses: eWaterCycle/setup-apptainer@main - - - name: Set up Singularity - if: matrix.profile == 'singularity' - run: | - mkdir -p $NXF_SINGULARITY_CACHEDIR - mkdir -p $NXF_SINGULARITY_LIBRARYDIR - - - name: Set up Miniconda - if: matrix.profile == 'conda' - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 - with: - miniconda-version: "latest" - auto-update-conda: true - conda-solver: libmamba - channels: conda-forge,bioconda - - - name: Set up Conda - if: matrix.profile == 'conda' - run: | - echo $(realpath $CONDA)/condabin >> $GITHUB_PATH - echo $(realpath python) >> $GITHUB_PATH - - - name: Clean up Disk space - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - - name: "Run tests | ${{ matrix.filter }}_${{ matrix.profile }} | ${{ matrix.shard }}/4" - run: | - nf-test test \ - --ci \ - --debug \ - --verbose \ - --junitxml="TEST-${{ matrix.filter }}_${{ matrix.profile }}_${{ matrix.shard }}.xml" \ - --shard ${{ matrix.shard }}/5 \ - --changed-since HEAD^ \ - --follow-dependencies \ - --profile "+${{ matrix.profile }}" \ - --filter ${{ matrix.filter }} - - - name: Publish Test Report - uses: mikepenz/action-junit-report@v4 - if: success() || failure() # always run even if the previous step fails - with: - report_paths: "TEST-*.xml" - - - name: Clean up - if: always() - run: | - sudo rm -rf /home/ubuntu/tests/ diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml new file mode 100644 index 0000000..c951f6d --- /dev/null +++ b/.github/workflows/nf-test.yml @@ -0,0 +1,149 @@ +name: Run nf-test +on: + push: + paths-ignore: + - "docs/**" + - "**/meta.yml" + - "**/*.md" + - "**/*.png" + - "**/*.svg" + pull_request: + paths-ignore: + - "docs/**" + - "**/meta.yml" + - "**/*.md" + - "**/*.png" + - "**/*.svg" + release: + types: [published] + workflow_dispatch: + inputs: + runners: + description: "Runners to test on" + type: choice + options: + - "ubuntu-latest" + - "self-hosted" + default: "ubuntu-latest" + +# Cancel if a newer run is started +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NFT_DIFF: "pdiff" + NFT_DIFF_ARGS: "--line-numbers --width 120 --expand-tabs=2" + # renovate: datasource=github-releases depName=askimed/nf-test versioning=semver + NFT_VER: "0.9.2" + NFT_WORKDIR: "~" + NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity + +jobs: + nf-test-changes: + name: nf-test-changes + runs-on: ${{ github.event.inputs.runners || github.run_number > 1 && 'ubuntu-latest' || 'self-hosted' }} + outputs: + shard: ${{ steps.set-shards.outputs.shard }} + total_shards: ${{ steps.set-shards.outputs.total_shards }} + steps: + - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner + run: | + ls -la ./ + rm -rf ./* || true + rm -rf ./.??* || true + ls -la ./ + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + fetch-depth: 0 + + - name: get number of shards + id: set-shards + uses: ./.github/actions/get-shards + env: + NFT_VER: ${{ env.NFT_VER }} + with: + max_shards: 7 + + - name: debug + run: | + echo ${{ steps.set-shards.outputs.shard }} + echo ${{ steps.set-shards.outputs.total_shards }} + + nf-test: + runs-on: ${{ github.event.inputs.runners || github.run_number > 1 && 'ubuntu-latest' || 'self-hosted' }} + name: "${{ matrix.profile }} | ${{ matrix.NXF_VER }} | ${{ matrix.shard }}/${{ needs.nf-test-changes.outputs.total_shards }}" + needs: [nf-test-changes] + if: ${{ needs.nf-test-changes.outputs.total_shards != '0' }} + strategy: + fail-fast: false + matrix: + shard: ${{ fromJson(needs.nf-test-changes.outputs.shard) }} + profile: [conda, docker, singularity] + isMaster: + - ${{ github.base_ref == 'master' }} + # Exclude conda and singularity on dev + exclude: + - isMaster: false + profile: "conda" + - isMaster: false + profile: "singularity" + NXF_VER: + # renovate: datasource=github-releases depName=nextflow/nextflow versioning=semver + - "24.10.2" + - "latest-everything" + env: + NXF_ANSI_LOG: false + TOTAL_SHARDS: ${{ needs.nf-test-changes.outputs.total_shards }} + + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + fetch-depth: 0 + + - name: Run nf-test + uses: ./.github/actions/nf-test + env: + NFT_DIFF: ${{ env.NFT_DIFF }} + NFT_DIFF_ARGS: ${{ env.NFT_DIFF_ARGS }} + NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + with: + profile: ${{ matrix.profile }} + shard: ${{ matrix.shard }} + total_shards: ${{ env.TOTAL_SHARDS }} + + confirm-pass: + runs-on: ${{ github.event.inputs.runners || github.run_number > 1 && 'ubuntu-latest' || 'self-hosted' }} + needs: [nf-test] + if: always() + steps: + - name: One or more tests failed + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 + + - name: One or more tests cancelled + if: ${{ contains(needs.*.result, 'cancelled') }} + run: exit 1 + + - name: All tests ok + if: ${{ contains(needs.*.result, 'success') }} + run: exit 0 + + - name: debug-print + if: always() + run: | + echo "::group::DEBUG: `needs` Contents" + echo "DEBUG: toJSON(needs) = ${{ toJSON(needs) }}" + echo "DEBUG: toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" + echo "::endgroup::" + + - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner + if: always() + run: | + ls -la ./ + rm -rf ./* || true + rm -rf ./.??* || true + ls -la ./ diff --git a/.nf-core.yml b/.nf-core.yml index eabbf11..2cf4749 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,6 +1,7 @@ lint: actions_ci: false files_exist: + - .github/workflows/ci.yml - conf/modules.config files_unchanged: - .gitattributes diff --git a/nf-test.config b/nf-test.config index d5889c0..4ba00ef 100644 --- a/nf-test.config +++ b/nf-test.config @@ -1,12 +1,12 @@ config { - // location for all nf-tests + // location for all nf-test tests testsDir "." // nf-test directory including temporary files for each test - workDir ".nf-test" + workDir System.getenv("NFT_WORKDIR") ?: ".nf-test" // location of an optional nextflow.config file specific for executing tests - configFile "conf/test.config" + configFile "tests/nextflow.config" // ignore tests coming from the nf-core/modules repo ignore 'modules/nf-core/**/*', 'subworkflows/nf-core/**/*' @@ -14,7 +14,10 @@ config { // run all test with defined profile(s) from the main nextflow.config profile "test" - // Include plugins + // list of filenames or patterns that should be trigger a full test run + triggers 'nextflow.config', 'nf-test.config', 'conf/test.config', 'tests/nextflow.config' + + // load the necessary plugins plugins { load "nft-bam@0.4.0" load "nft-utils@0.0.3" From dbd164939c377e83d774a422dcd9c3d4959f7e68 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 9 Apr 2025 16:04:22 +0200 Subject: [PATCH 152/234] fix tests --- tests/default.nf.test | 2 +- tests/default.nf.test.snap | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/default.nf.test b/tests/default.nf.test index 261cb87..0f3df9d 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -5,7 +5,7 @@ nextflow_pipeline { tag "pipeline" tag "pipeline_createpanelrefs" - test("Run with profile test | default aka cnvkit") { + test("-profile test --tools cnvkit") { when { params { diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index da7cb82..4aa6000 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,5 +1,5 @@ { - "Run with profile test | default aka cnvkit": { + "-profile test --tools cnvkit": { "content": [ 4, { @@ -55,6 +55,6 @@ "nf-test": "0.9.2", "nextflow": "24.10.5" }, - "timestamp": "2025-04-08T16:03:23.731350516" + "timestamp": "2025-04-09T16:04:00.7563668" } } \ No newline at end of file From bc99a1de6062dc9f69aaa81535a0375822bf9e13 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 9 Apr 2025 16:58:41 +0200 Subject: [PATCH 153/234] fix tests --- conf/test.config | 2 +- main.nf | 7 +++++-- tests/default.nf.test | 3 ++- tests/default.nf.test.snap | 4 ++-- workflows/createpanelrefs.nf | 10 ++++++---- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/conf/test.config b/conf/test.config index fab55ad..0d3f9fa 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,7 +23,7 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - input = "${projectDir}/tests/csv/1.0.0/bam_cram.csv" + input = "${projectDir}/tests/csv/1.0.0/bam.csv" // Main options tools = 'cnvkit' diff --git a/main.nf b/main.nf index 950cc18..6468136 100644 --- a/main.nf +++ b/main.nf @@ -51,6 +51,7 @@ workflow NFCORE_CREATEPANELREFS { take: samplesheet // channel: samplesheet read in from --input + tools main: @@ -58,7 +59,8 @@ workflow NFCORE_CREATEPANELREFS { // WORKFLOW: Run pipeline // CREATEPANELREFS ( - samplesheet + samplesheet, + tools ) emit: multiqc_report = CREATEPANELREFS.out.multiqc_report // channel: /path/to/multiqc_report.html @@ -88,7 +90,8 @@ workflow { // WORKFLOW: Run main workflow // NFCORE_CREATEPANELREFS ( - PIPELINE_INITIALISATION.out.samplesheet + PIPELINE_INITIALISATION.out.samplesheet, + params.tools ) // // SUBWORKFLOW: Run completion tasks diff --git a/tests/default.nf.test b/tests/default.nf.test index 0f3df9d..f871e5a 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -5,11 +5,12 @@ nextflow_pipeline { tag "pipeline" tag "pipeline_createpanelrefs" - test("-profile test --tools cnvkit") { + test("-profile test --tools cnvkit --input tests/csv/1.0.0/bam_cram.csv") { when { params { outdir = "$outputDir" + input = "${projectDir}/tests/csv/1.0.0/bam_cram.csv" } } diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 4aa6000..19dbe2a 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,5 +1,5 @@ { - "-profile test --tools cnvkit": { + "-profile test --tools cnvkit --input tests/csv/1.0.0/bam_cram.csv": { "content": [ 4, { @@ -55,6 +55,6 @@ "nf-test": "0.9.2", "nextflow": "24.10.5" }, - "timestamp": "2025-04-09T16:04:00.7563668" + "timestamp": "2025-04-09T16:56:32.61350234" } } \ No newline at end of file diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index c293c40..3c70a15 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -79,11 +79,13 @@ workflow CREATEPANELREFS { take: ch_samplesheet // channel: samplesheet read in from --input + tools + main: ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - if (params.tools && params.tools.split(',').contains('cnvkit')) { + if (tools && tools.split(',').contains('cnvkit')) { ch_samplesheet .branch { meta, bam, bai, cram, crai -> @@ -109,7 +111,7 @@ workflow CREATEPANELREFS { ch_versions = ch_versions.mix(CNVKIT_BATCH.out.versions) } - if (params.tools && params.tools.split(',').contains('germlinecnvcaller')) { + if (tools && tools.split(',').contains('germlinecnvcaller')) { ch_samplesheet .map{meta, bam, bai, cram, crai -> @@ -134,7 +136,7 @@ workflow CREATEPANELREFS { ch_versions = ch_versions.mix(GERMLINECNVCALLER_COHORT.out.versions) } - if (params.tools && params.tools.split(',').contains('mutect2')) { + if (tools && tools.split(',').contains('mutect2')) { ch_mutect2_input = ch_samplesheet.map{meta, bam, bai, cram, crai -> if (bam) return [ meta + [data_type:'bam'], bam, bai, [] ] @@ -152,7 +154,7 @@ workflow CREATEPANELREFS { } - if (params.tools && params.tools.split(',').contains('gens')) { + if (tools && tools.split(',').contains('gens')) { ch_samplesheet .map{meta, bam, bai, cram, crai -> From d012cf506d6f2f0763ec3493486035c6de547ea0 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 9 Apr 2025 17:26:35 +0200 Subject: [PATCH 154/234] language server --- main.nf | 79 ++++++++++++++++++++------------------------------------- 1 file changed, 28 insertions(+), 51 deletions(-) diff --git a/main.nf b/main.nf index 950cc18..99cc16e 100644 --- a/main.nf +++ b/main.nf @@ -15,8 +15,6 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// This is an example of how to use getGenomeAttribute() to fetch parameters -// from igenomes.config using `--genome` params.fasta = getGenomeAttribute('fasta') params.fai = getGenomeAttribute('fai') params.dict = getGenomeAttribute('dict') @@ -38,31 +36,6 @@ include { CREATEPANELREFS } from './workflows/createpanelrefs' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline' -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOWS FOR PIPELINE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// WORKFLOW: Run main analysis pipeline depending on type of input -// -workflow NFCORE_CREATEPANELREFS { - - take: - samplesheet // channel: samplesheet read in from --input - - main: - - // - // WORKFLOW: Run pipeline - // - CREATEPANELREFS ( - samplesheet - ) - emit: - multiqc_report = CREATEPANELREFS.out.multiqc_report // channel: /path/to/multiqc_report.html -} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -70,61 +43,65 @@ workflow NFCORE_CREATEPANELREFS { */ workflow { - - main: - // // SUBWORKFLOW: Run initialisation tasks - // - PIPELINE_INITIALISATION ( + PIPELINE_INITIALISATION( params.version, params.validate_params, params.monochrome_logs, args, params.outdir, - params.input + params.input, ) - // // WORKFLOW: Run main workflow - // - NFCORE_CREATEPANELREFS ( + NFCORE_CREATEPANELREFS( PIPELINE_INITIALISATION.out.samplesheet ) - // + // SUBWORKFLOW: Run completion tasks - // - PIPELINE_COMPLETION ( + PIPELINE_COMPLETION( params.email, params.email_on_fail, params.plaintext_email, params.outdir, params.monochrome_logs, params.hook_url, - NFCORE_CREATEPANELREFS.out.multiqc_report + NFCORE_CREATEPANELREFS.out.multiqc_report, ) } +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOWS FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// WORKFLOW: Run main analysis pipeline depending on type of input +workflow NFCORE_CREATEPANELREFS { + take: + samplesheet // channel: samplesheet read in from --input + + main: + + // WORKFLOW: Run pipeline + CREATEPANELREFS(samplesheet) + + emit: + multiqc_report = CREATEPANELREFS.out.multiqc_report // channel: /path/to/multiqc_report.html +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEFINE FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// // Get attribute from genome config file e.g. fasta -// - def getGenomeAttribute(attribute) { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] + if (params.genomes[params.genome].containsKey(attribute)) { + return params.genomes[params.genome][attribute] } } return null } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ From ae6558f8895dee62ce18ea29e7e5db3618b5c6ce Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 9 Apr 2025 17:33:42 +0200 Subject: [PATCH 155/234] fix GHA --- .github/actions/get-shards/action.yml | 7 +++++-- .github/actions/nf-test/action.yml | 14 ++++++++------ .github/workflows/nf-test.yml | 11 ++++------- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/.github/actions/get-shards/action.yml b/.github/actions/get-shards/action.yml index 6d388be..3408527 100644 --- a/.github/actions/get-shards/action.yml +++ b/.github/actions/get-shards/action.yml @@ -7,6 +7,9 @@ inputs: paths: description: "Component paths to test" required: false + tags: + description: "Tags to pass as argument for nf-test --tag parameter" + required: false outputs: shard: description: "Array of shard numbers" @@ -27,9 +30,9 @@ runs: run: | # Run nf-test with dynamic parameter nftest_output=$(nf-test test \ - --dry-run \ --profile +docker \ - --filter function,workflow,pipeline \ + $(if [ -n "${{ inputs.tags }}" ]; then echo "--tag ${{ inputs.tags }}"; fi) \ + --dry-run \ --ci \ --changed-since HEAD^) || { echo "nf-test command failed with exit code $?" diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml index e4fd9c3..ab46869 100644 --- a/.github/actions/nf-test/action.yml +++ b/.github/actions/nf-test/action.yml @@ -13,7 +13,9 @@ inputs: paths: description: "Test paths" required: true - + tags: + description: "Tags to pass as argument for nf-test --tag parameter" + required: false runs: using: "composite" steps: @@ -44,7 +46,7 @@ runs: mkdir -p $NXF_SINGULARITY_LIBRARYDIR - name: Conda setup - if: ${{inputs.profile == 'conda'}} + if: contains(inputs.profile, 'conda') uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3 with: auto-update-conda: true @@ -70,12 +72,12 @@ runs: run: | nf-test test \ --profile=+${{ inputs.profile }} \ - --tap=test.tap \ - --verbose \ + $(if [ -n "${{ inputs.tags }}" ]; then echo "--tag ${{ inputs.tags }}"; fi) \ --ci \ --changed-since HEAD^ \ - --shard ${{ inputs.shard }}/${{ inputs.total_shards }} \ - --filter function,workflow,pipeline + --verbose \ + --tap=test.tap \ + --shard ${{ inputs.shard }}/${{ inputs.total_shards }} # Save the absolute path of the test.tap file to the output echo "tap_file_path=$(realpath test.tap)" >> $GITHUB_OUTPUT diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index c951f6d..4715b74 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -35,7 +35,6 @@ env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} NFT_DIFF: "pdiff" NFT_DIFF_ARGS: "--line-numbers --width 120 --expand-tabs=2" - # renovate: datasource=github-releases depName=askimed/nf-test versioning=semver NFT_VER: "0.9.2" NFT_WORKDIR: "~" NXF_ANSI_LOG: false @@ -83,16 +82,15 @@ jobs: matrix: shard: ${{ fromJson(needs.nf-test-changes.outputs.shard) }} profile: [conda, docker, singularity] - isMaster: - - ${{ github.base_ref == 'master' }} + isMain: + - ${{ github.base_ref == 'master' || github.base_ref == 'main' }} # Exclude conda and singularity on dev exclude: - - isMaster: false + - isMain: false profile: "conda" - - isMaster: false + - isMain: false profile: "singularity" NXF_VER: - # renovate: datasource=github-releases depName=nextflow/nextflow versioning=semver - "24.10.2" - "latest-everything" env: @@ -114,7 +112,6 @@ jobs: profile: ${{ matrix.profile }} shard: ${{ matrix.shard }} total_shards: ${{ env.TOTAL_SHARDS }} - confirm-pass: runs-on: ${{ github.event.inputs.runners || github.run_number > 1 && 'ubuntu-latest' || 'self-hosted' }} needs: [nf-test] From d83011544dbddf5a25baf01b5b4eb1a4a5b9d28d Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 9 Apr 2025 17:37:37 +0200 Subject: [PATCH 156/234] main: --- main.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/main.nf b/main.nf index 0a80bb9..56c78c6 100644 --- a/main.nf +++ b/main.nf @@ -43,6 +43,8 @@ include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_crea */ workflow { + main: + // SUBWORKFLOW: Run initialisation tasks PIPELINE_INITIALISATION( params.version, From c3307fc204282c8f1afc8cbf3d5482759f7e5a44 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 9 Apr 2025 17:49:14 +0200 Subject: [PATCH 157/234] fix parenthesis --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 56c78c6..a3b4701 100644 --- a/main.nf +++ b/main.nf @@ -56,8 +56,9 @@ workflow { ) // WORKFLOW: Run main workflow + NFCORE_CREATEPANELREFS( PIPELINE_INITIALISATION.out.samplesheet, - params.tools + params.tools, ) // SUBWORKFLOW: Run completion tasks @@ -85,7 +86,6 @@ workflow NFCORE_CREATEPANELREFS { tools main: - // WORKFLOW: Run pipeline CREATEPANELREFS(samplesheet, tools) From 851335641de85b0e9a8a74f706b6cad4afea4598 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 9 Apr 2025 17:52:11 +0200 Subject: [PATCH 158/234] fix linting --- nextflow.config | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 5c215f5..d607448 100644 --- a/nextflow.config +++ b/nextflow.config @@ -191,8 +191,7 @@ profiles { includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" // Load nf-core/createpanelrefs custom profiles from different institutions. -// TODO nf-core: Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs -// includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/createpanelrefs.config" : "/dev/null" +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/createpanelrefs.config" : "/dev/null" // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled From 9a914c53be144498a8b097403cbd7bb8dc3bb475 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 12:21:15 +0200 Subject: [PATCH 159/234] forgot file --- tests/nextflow.config | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 tests/nextflow.config diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 0000000..393cbed --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,15 @@ +/* +======================================================================================== + Nextflow config file for running nf-test tests +======================================================================================== +*/ + +// Specify any additional parameters here +// Or any resources requirements + +// Should resolve issue with accessing s3 from the runners +aws.client.anonymous = true + +// Should take care of all basepath +params.modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' +params.igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data' From 8b3688c9df057cdc9098801b5762a9c790e6c2f8 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 12:23:06 +0200 Subject: [PATCH 160/234] early fail --- tests/default.nf.test | 2 +- tests/gens_pon.nf.test | 2 +- tests/germlinecnvcaller_cohort.nf.test | 2 +- tests/mutect2.nf.test | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/default.nf.test b/tests/default.nf.test index f871e5a..d3497a5 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -19,8 +19,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success assertAll( - { assert workflow.success}, { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), diff --git a/tests/gens_pon.nf.test b/tests/gens_pon.nf.test index 0d80ae8..dd14828 100644 --- a/tests/gens_pon.nf.test +++ b/tests/gens_pon.nf.test @@ -27,8 +27,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success assertAll( - { assert workflow.success}, { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), diff --git a/tests/germlinecnvcaller_cohort.nf.test b/tests/germlinecnvcaller_cohort.nf.test index 465b550..ed01f61 100644 --- a/tests/germlinecnvcaller_cohort.nf.test +++ b/tests/germlinecnvcaller_cohort.nf.test @@ -27,8 +27,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success assertAll( - { assert workflow.success}, { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), diff --git a/tests/mutect2.nf.test b/tests/mutect2.nf.test index a346052..a8e836b 100644 --- a/tests/mutect2.nf.test +++ b/tests/mutect2.nf.test @@ -19,8 +19,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'gatk4/test/**/*']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success assertAll( - { assert workflow.success}, { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), From ab071b82742c0c4099e20e42cc02f1cc137c5821 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 12:47:57 +0200 Subject: [PATCH 161/234] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f819c1e..57de082 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#31](https://github.com/nf-core/createpanelrefs/pull/31) - Publish interval_list file from gens subworkflow by default. - [#35](https://github.com/nf-core/createpanelrefs/pull/35) - Template update for nf-core/tools v3.0.2 - [#35](https://github.com/nf-core/createpanelrefs/pull/35) - Improve pipeline level tests +- [#48](https://github.com/nf-core/createpanelrefs/pull/48) - Improve CI (automatic nf-test shards) ### `Fixed` From fb7a0be9b9fe3958e11dd7fbd2d7427cf5c24312 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 16:23:46 +0200 Subject: [PATCH 162/234] fix test --- .github/workflows/nf-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index 4715b74..1d63648 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -112,7 +112,7 @@ jobs: profile: ${{ matrix.profile }} shard: ${{ matrix.shard }} total_shards: ${{ env.TOTAL_SHARDS }} - confirm-pass: + confirm-pass-nf-test: runs-on: ${{ github.event.inputs.runners || github.run_number > 1 && 'ubuntu-latest' || 'self-hosted' }} needs: [nf-test] if: always() From e5c8b859792cb37a27eb71431ea80b93a67142cf Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 16:26:32 +0200 Subject: [PATCH 163/234] runsOn --- .github/workflows/nf-test.yml | 5 ++++- CHANGELOG.md | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index 1d63648..07e741e 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -113,7 +113,10 @@ jobs: shard: ${{ matrix.shard }} total_shards: ${{ env.TOTAL_SHARDS }} confirm-pass-nf-test: - runs-on: ${{ github.event.inputs.runners || github.run_number > 1 && 'ubuntu-latest' || 'self-hosted' }} + runs-on: + - runs-on=${{ github.run_id }} + - runner=4cpu-linux-x64 + - image=ubuntu22-full-x64 needs: [nf-test] if: always() steps: diff --git a/CHANGELOG.md b/CHANGELOG.md index 57de082..7679a90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#31](https://github.com/nf-core/createpanelrefs/pull/31) - Publish interval_list file from gens subworkflow by default. - [#35](https://github.com/nf-core/createpanelrefs/pull/35) - Template update for nf-core/tools v3.0.2 - [#35](https://github.com/nf-core/createpanelrefs/pull/35) - Improve pipeline level tests -- [#48](https://github.com/nf-core/createpanelrefs/pull/48) - Improve CI (automatic nf-test shards) +- [#48](https://github.com/nf-core/createpanelrefs/pull/48) - Improve CI (early failure + automatic nf-test shards + [RunsOn](https://runs-on.com/)) ### `Fixed` From 6a53ad59663971937cf156d00f5214af28694d23 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 17:48:49 +0200 Subject: [PATCH 164/234] add mutect2 cram tests --- tests/csv/1.0.0/cram.csv | 6 +-- tests/default.nf.test | 2 +- tests/gens_pon.nf.test | 2 +- tests/germlinecnvcaller_cohort.nf.test | 2 +- tests/mutect2.nf.test | 36 ++++++++++++++- tests/mutect2.nf.test.snap | 62 +++++++++++++++++++++++++- 6 files changed, 100 insertions(+), 10 deletions(-) diff --git a/tests/csv/1.0.0/cram.csv b/tests/csv/1.0.0/cram.csv index 67a3fd1..9aaa6b0 100644 --- a/tests/csv/1.0.0/cram.csv +++ b/tests/csv/1.0.0/cram.csv @@ -1,3 +1,3 @@ -sample,cram -sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram -sample4,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram +sample,cram,crai +sample3,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram.crai +sample4,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/cram/test2.paired_end.markduplicates.sorted.cram.crai diff --git a/tests/default.nf.test b/tests/default.nf.test index f871e5a..d3497a5 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -19,8 +19,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success assertAll( - { assert workflow.success}, { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), diff --git a/tests/gens_pon.nf.test b/tests/gens_pon.nf.test index 6977905..ba9d421 100644 --- a/tests/gens_pon.nf.test +++ b/tests/gens_pon.nf.test @@ -27,8 +27,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success assertAll( - { assert workflow.success}, { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), diff --git a/tests/germlinecnvcaller_cohort.nf.test b/tests/germlinecnvcaller_cohort.nf.test index ba9bd6d..ff5046a 100644 --- a/tests/germlinecnvcaller_cohort.nf.test +++ b/tests/germlinecnvcaller_cohort.nf.test @@ -27,8 +27,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success assertAll( - { assert workflow.success}, { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), diff --git a/tests/mutect2.nf.test b/tests/mutect2.nf.test index 61a62f6..42d1603 100644 --- a/tests/mutect2.nf.test +++ b/tests/mutect2.nf.test @@ -4,7 +4,7 @@ nextflow_pipeline { script "main.nf" tag "MUTECT2" - test("Run MUTECT2 test") { + test("-profile test --tools mutect2") { when { params { @@ -19,8 +19,40 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'gatk4/test/**/*']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success + assertAll( + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } + + test("-profile test --tools mutect2 --input tests/csv/1.0.0/cram.csv") { + + when { + params { + input = "${projectDir}/tests/csv/1.0.0/cram.csv" + outdir = "$outputDir" + tools = 'mutect2' + mutect2_pon_name = 'test' + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'gatk4/test/**/*']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success assertAll( - { assert workflow.success}, { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), diff --git a/tests/mutect2.nf.test.snap b/tests/mutect2.nf.test.snap index dd8e352..2abf197 100644 --- a/tests/mutect2.nf.test.snap +++ b/tests/mutect2.nf.test.snap @@ -1,5 +1,63 @@ { - "Run MUTECT2 test": { + "-profile test --tools mutect2 --input tests/csv/1.0.0/cram.csv": { + "content": [ + 5, + { + "GATK4_CREATESOMATICPANELOFNORMALS": { + "gatk4": "4.5.0.0" + }, + "GATK4_GENOMICSDBIMPORT": { + "gatk4": "4.5.0.0" + }, + "GATK4_MUTECT2": { + "gatk4": "4.5.0.0" + }, + "Workflow": { + "nf-core/createpanelrefs": "v1.0dev" + } + }, + [ + "gatk4", + "gatk4/sample3.vcf.gz", + "gatk4/sample3.vcf.gz.stats", + "gatk4/sample3.vcf.gz.tbi", + "gatk4/sample4.vcf.gz", + "gatk4/sample4.vcf.gz.stats", + "gatk4/sample4.vcf.gz.tbi", + "gatk4/test", + "gatk4/test.vcf.gz", + "gatk4/test.vcf.gz.tbi", + "gatk4/test/__tiledb_workspace.tdb", + "gatk4/test/callset.json", + "gatk4/test/chr21$2$23354000", + "gatk4/test/chr21$24132500$24910998", + "gatk4/test/chr21$25689498$46709983", + "gatk4/test/vcfheader.vcf", + "gatk4/test/vidmap.json", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml" + ], + [ + "sample3.vcf.gz.stats:md5,a05ace4138fc5cb993ed912d654ec22d", + "sample4.vcf.gz.stats:md5,080e6d0e254e582dfb9d5916c9637391", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-11T17:42:06.529424623" + }, + "-profile test --tools mutect2": { "content": [ 5, { @@ -57,4 +115,4 @@ }, "timestamp": "2024-10-11T12:02:22.796491" } -} \ No newline at end of file +} From 782aa8e895b476edf44aaed9b9177e650267652a Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 17:54:20 +0200 Subject: [PATCH 165/234] improve referecences for tests --- conf/igenomes.config | 6 ++++++ conf/test.config | 8 ++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/conf/igenomes.config b/conf/igenomes.config index 73e84d9..75de6a5 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -467,5 +467,11 @@ params { readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" } + 'GRCh38.chr21.testdata' { + fasta = "${params.igenomes_base}/homo_sapiens/genome/chr21/sequence/genome.fasta" + dict = "${params.igenomes_base}/homo_sapiens/genome/chr21/sequence/genome.dict" + fai = "${params.igenomes_base}/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" + mutect2_target_bed = "${params.igenomes_base}/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + } } } diff --git a/conf/test.config b/conf/test.config index 0d3f9fa..9aa01d3 100644 --- a/conf/test.config +++ b/conf/test.config @@ -33,10 +33,6 @@ params { gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" // Small reference genome - genome = null - igenomes_ignore = true - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" - dict = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.dict" - fai = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" - mutect2_target_bed = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + genome = 'GRCh38.chr21.testdata' + igenomes_base = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics" } From 2759d8d4042d1a193b466b79f1a92ef77f819109 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 18:02:50 +0200 Subject: [PATCH 166/234] improve test usage --- tests/gens_pon.nf.test | 4 ++-- tests/germlinecnvcaller_cohort.nf.test | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/gens_pon.nf.test b/tests/gens_pon.nf.test index dd14828..82837c8 100644 --- a/tests/gens_pon.nf.test +++ b/tests/gens_pon.nf.test @@ -10,8 +10,8 @@ nextflow_pipeline { when { params { - dict = null - fai = null + genome = 'null' + igenomes_ignore = true fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" gens_bin_length = 100 gens_pon_name = 'gens_pon' diff --git a/tests/germlinecnvcaller_cohort.nf.test b/tests/germlinecnvcaller_cohort.nf.test index ed01f61..ede18ee 100644 --- a/tests/germlinecnvcaller_cohort.nf.test +++ b/tests/germlinecnvcaller_cohort.nf.test @@ -10,8 +10,8 @@ nextflow_pipeline { when { params { - dict = null - fai = null + genome = 'null' + igenomes_ignore = true fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" gcnv_model_name = 'cohort' gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" From 4c4c5ee2597a5b50771713763c3a755b830a8dd6 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 18:16:42 +0200 Subject: [PATCH 167/234] sort params --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index a3b4701..72f70d0 100644 --- a/main.nf +++ b/main.nf @@ -16,15 +16,15 @@ */ params.fasta = getGenomeAttribute('fasta') -params.fai = getGenomeAttribute('fai') params.dict = getGenomeAttribute('dict') +params.fai = getGenomeAttribute('fai') params.gcnv_exclude_bed = getGenomeAttribute('gcnv_exclude_bed') params.gcnv_exclude_interval_list = getGenomeAttribute('gcnv_exclude_interval_list') params.gcnv_mappable_regions = getGenomeAttribute('gcnv_mappable_regions') +params.gcnv_ploidy_priors = getGenomeAttribute('gcnv_ploidy_priors') params.gcnv_segmental_duplications = getGenomeAttribute('gcnv_segmental_duplications') params.gcnv_target_bed = getGenomeAttribute('gcnv_target_bed') params.gcnv_target_interval_list = getGenomeAttribute('gcnv_target_interval_list') -params.gcnv_ploidy_priors = getGenomeAttribute('gcnv_ploidy_priors') params.mutect2_target_bed = getGenomeAttribute('mutect2_target_bed') /* From 3a769801900dfa956b290f8e4afa39f294e89196 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 18:17:01 +0200 Subject: [PATCH 168/234] references are references --- nextflow_schema.json | 170 +++++++++++++++++++++---------------------- 1 file changed, 85 insertions(+), 85 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 4200450..2f91a4b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -25,32 +25,6 @@ "fa_icon": "fas fa-sort-numeric-down", "help_text": "Used by GATK's PreprocessIntervals. GATK recommends a bin length of 1000 for WGS analysis, and 0 for WES analysis. " }, - "gcnv_exclude_bed": { - "type": "string", - "exists": true, - "format": "path", - "fa_icon": "fas fa-file", - "pattern": "^\\S+\\.bed$", - "description": "Path to directory for a bed file containing regions to be exluded from the analysis.", - "help_text": "If the regions you would like to exclude are in bed format, use this option. If you have an interval_list file, use `exclude_interval_list` parameter instead." - }, - "gcnv_exclude_interval_list": { - "type": "string", - "exists": true, - "format": "path", - "fa_icon": "fas fa-file", - "pattern": "^\\S+\\._interval_list$", - "description": "Path to directory for exclude_interval_list file.", - "help_text": "If the regions you would like to exclude are in interval_list format, use this option. If you have a bed file, use `exclude` parameter instead." - }, - "gcnv_mappable_regions": { - "type": "string", - "exists": true, - "description": "Path to Umap single-read mappability track in .bed or .bed.gz format. Overlapping intervals must be merged.", - "format": "file-path", - "fa_icon": "fas fa-file", - "help_text": "Used by GATK's AnnotateIntervals." - }, "gcnv_padding": { "type": "number", "description": "Length (in bp) of the padding regions on each side of the intervals.", @@ -63,15 +37,6 @@ "description": "Name for panel of normals.", "default": "germlinecnvcaller" }, - "gcnv_ploidy_priors": { - "type": "string", - "exists": true, - "format": "file-path", - "mimetype": "text/plain", - "description": "Path to a file containing ploidy priors table.", - "fa_icon": "fas fa-file", - "help_text": "Used by GATK's DeterminGermlineContigPloidy." - }, "gcnv_readcount_format": { "type": "string", "description": "Output file format for count data", @@ -85,32 +50,6 @@ "default": 5000, "fa_icon": "fas fa-sort-numeric-down", "help_text": "Used by GATK's IntervalListTools." - }, - "gcnv_segmental_duplications": { - "type": "string", - "exists": true, - "description": "Path to segmental-duplication track in .bed or .bed.gz format. Overlapping intervals must be merged.", - "format": "file-path", - "fa_icon": "fas fa-file", - "help_text": "Used by GATK's AnnotateIntervals." - }, - "gcnv_target_bed": { - "type": "string", - "exists": true, - "format": "path", - "fa_icon": "fas fa-file", - "pattern": "^\\S+\\.bed$", - "description": "Path to directory for target bed file.", - "help_text": "If the regions you would like to analyse are in bed format, use this option. If you have an interval_list file, use `target_interval_list` parameter instead." - }, - "gcnv_target_interval_list": { - "type": "string", - "exists": true, - "format": "path", - "fa_icon": "fas fa-file", - "pattern": "^\\S+\\._interval_list$", - "description": "Path to directory for target interval_list file.", - "help_text": "If the regions you would like to analyse are in interval_list format, use this option. If you have a bed file, use `target_bed` parameter instead." } } }, @@ -171,15 +110,6 @@ "properties": { "mutect2_pon_name": { "type": "string" - }, - "mutect2_target_bed": { - "type": "string", - "description": "Path to target bed file", - "pattern": "^\\S+\\.bed$", - "format": "file-path", - "fa_icon": "fas fa-file", - "exists": true, - "mimetype": "text/plain" } } }, @@ -268,21 +198,6 @@ "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", "fa_icon": "far fa-file-code" }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." - }, - "igenomes_base": { - "type": "string", - "format": "directory-path", - "description": "The base path to the igenomes reference files", - "fa_icon": "fas fa-ban", - "hidden": true, - "default": "s3://ngi-igenomes/igenomes/" - }, "dict": { "type": "string", "description": "Path to sequence dictionary file", @@ -300,6 +215,91 @@ "fa_icon": "fas fa-file", "exists": true, "mimetype": "text/plain" + }, + "gcnv_exclude_bed": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file", + "pattern": "^\\S+\\.bed$", + "description": "Path to directory for a bed file containing regions to be exluded from the analysis.", + "help_text": "If the regions you would like to exclude are in bed format, use this option. If you have an interval_list file, use `exclude_interval_list` parameter instead." + }, + "gcnv_exclude_interval_list": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file", + "pattern": "^\\S+\\._interval_list$", + "description": "Path to directory for exclude_interval_list file.", + "help_text": "If the regions you would like to exclude are in interval_list format, use this option. If you have a bed file, use `exclude` parameter instead." + }, + "gcnv_mappable_regions": { + "type": "string", + "exists": true, + "description": "Path to Umap single-read mappability track in .bed or .bed.gz format. Overlapping intervals must be merged.", + "format": "file-path", + "fa_icon": "fas fa-file", + "help_text": "Used by GATK's AnnotateIntervals." + }, + "gcnv_ploidy_priors": { + "type": "string", + "exists": true, + "format": "file-path", + "mimetype": "text/plain", + "description": "Path to a file containing ploidy priors table.", + "fa_icon": "fas fa-file", + "help_text": "Used by GATK's DeterminGermlineContigPloidy." + }, + "gcnv_segmental_duplications": { + "type": "string", + "exists": true, + "description": "Path to segmental-duplication track in .bed or .bed.gz format. Overlapping intervals must be merged.", + "format": "file-path", + "fa_icon": "fas fa-file", + "help_text": "Used by GATK's AnnotateIntervals." + }, + "gcnv_target_bed": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file", + "pattern": "^\\S+\\.bed$", + "description": "Path to directory for target bed file.", + "help_text": "If the regions you would like to analyse are in bed format, use this option. If you have an interval_list file, use `target_interval_list` parameter instead." + }, + "gcnv_target_interval_list": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file", + "pattern": "^\\S+\\._interval_list$", + "description": "Path to directory for target interval_list file.", + "help_text": "If the regions you would like to analyse are in interval_list format, use this option. If you have a bed file, use `target_bed` parameter instead." + }, + "mutect2_target_bed": { + "type": "string", + "description": "Path to target bed file", + "pattern": "^\\S+\\.bed$", + "format": "file-path", + "fa_icon": "fas fa-file", + "exists": true, + "mimetype": "text/plain" + }, + "igenomes_ignore": { + "type": "boolean", + "description": "Do not load the iGenomes reference config.", + "fa_icon": "fas fa-ban", + "hidden": true, + "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + }, + "igenomes_base": { + "type": "string", + "format": "directory-path", + "description": "The base path to the igenomes reference files", + "fa_icon": "fas fa-ban", + "hidden": true, + "default": "s3://ngi-igenomes/igenomes/" } } }, From da838e9292cfb41085d8f96c1789034dc3821e1b Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 18:29:43 +0200 Subject: [PATCH 169/234] more code polish --- conf/igenomes.config | 1 + conf/test.config | 1 - tests/gens_pon.config | 8 -------- tests/gens_pon.nf.test | 3 --- tests/germlinecnvcaller_cohort.nf.test | 4 ---- 5 files changed, 1 insertion(+), 16 deletions(-) diff --git a/conf/igenomes.config b/conf/igenomes.config index 75de6a5..4c2b648 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -472,6 +472,7 @@ params { dict = "${params.igenomes_base}/homo_sapiens/genome/chr21/sequence/genome.dict" fai = "${params.igenomes_base}/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" mutect2_target_bed = "${params.igenomes_base}/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + gcnv_ploidy_priors = "${params.igenomes_base}/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" } } } diff --git a/conf/test.config b/conf/test.config index 9aa01d3..b372be7 100644 --- a/conf/test.config +++ b/conf/test.config @@ -30,7 +30,6 @@ params { //Germlinecnvcaller options gcnv_scatter_content = 2 - gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" // Small reference genome genome = 'GRCh38.chr21.testdata' diff --git a/tests/gens_pon.config b/tests/gens_pon.config index c026504..e89e138 100644 --- a/tests/gens_pon.config +++ b/tests/gens_pon.config @@ -1,5 +1,4 @@ process { - withName: 'GATK4_CREATEREADCOUNTPANELOFNORMALS' { ext.args = "--minimum-interval-median-percentile 10 --number-of-eigensamples 2" } @@ -9,13 +8,6 @@ process { profiles { docker { - docker.enabled = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false docker.runOptions = '-u root' } } diff --git a/tests/gens_pon.nf.test b/tests/gens_pon.nf.test index 82837c8..ae112b0 100644 --- a/tests/gens_pon.nf.test +++ b/tests/gens_pon.nf.test @@ -10,9 +10,6 @@ nextflow_pipeline { when { params { - genome = 'null' - igenomes_ignore = true - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" gens_bin_length = 100 gens_pon_name = 'gens_pon' gens_readcount_format = "TSV" diff --git a/tests/germlinecnvcaller_cohort.nf.test b/tests/germlinecnvcaller_cohort.nf.test index ede18ee..8e656ad 100644 --- a/tests/germlinecnvcaller_cohort.nf.test +++ b/tests/germlinecnvcaller_cohort.nf.test @@ -10,11 +10,7 @@ nextflow_pipeline { when { params { - genome = 'null' - igenomes_ignore = true - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" gcnv_model_name = 'cohort' - gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" gcnv_scatter_content = 2 input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" outdir = "$outputDir" From 366f7107e3d771596a3199149f8ef28c6476f4b2 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 18:46:15 +0200 Subject: [PATCH 170/234] better usage of igenomes_base --- conf/igenomes.config | 10 +++++----- conf/test.config | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/conf/igenomes.config b/conf/igenomes.config index 4c2b648..c7f030b 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -468,11 +468,11 @@ params { star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" } 'GRCh38.chr21.testdata' { - fasta = "${params.igenomes_base}/homo_sapiens/genome/chr21/sequence/genome.fasta" - dict = "${params.igenomes_base}/homo_sapiens/genome/chr21/sequence/genome.dict" - fai = "${params.igenomes_base}/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" - mutect2_target_bed = "${params.igenomes_base}/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" - gcnv_ploidy_priors = "${params.igenomes_base}/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" + fasta = "${params.igenomes_base}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" + dict = "${params.igenomes_base}/genomics/homo_sapiens/genome/chr21/sequence/genome.dict" + fai = "${params.igenomes_base}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" + mutect2_target_bed = "${params.igenomes_base}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + gcnv_ploidy_priors = "${params.igenomes_base}/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" } } } diff --git a/conf/test.config b/conf/test.config index b372be7..eda3efc 100644 --- a/conf/test.config +++ b/conf/test.config @@ -33,5 +33,4 @@ params { // Small reference genome genome = 'GRCh38.chr21.testdata' - igenomes_base = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics" } From 37dcb36811be8010112268d1301cc56f7f17b5c7 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 18:47:02 +0200 Subject: [PATCH 171/234] fix gens and germlinecnvcaller tests --- tests/gens_pon.nf.test | 16 ++++++++++------ tests/germlinecnvcaller_cohort.nf.test | 14 +++++++++----- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/tests/gens_pon.nf.test b/tests/gens_pon.nf.test index ae112b0..0d14d26 100644 --- a/tests/gens_pon.nf.test +++ b/tests/gens_pon.nf.test @@ -10,12 +10,16 @@ nextflow_pipeline { when { params { - gens_bin_length = 100 - gens_pon_name = 'gens_pon' - gens_readcount_format = "TSV" - input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" - outdir = "$outputDir" - tools = 'gens' + igenomes_ignore = true + genome = null + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + fasta = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.fasta" + gens_bin_length = 100 + gens_pon_name = 'gens_pon' + gens_readcount_format = "TSV" + input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" + outdir = "$outputDir" + tools = 'gens' } } diff --git a/tests/germlinecnvcaller_cohort.nf.test b/tests/germlinecnvcaller_cohort.nf.test index 8e656ad..6136dfe 100644 --- a/tests/germlinecnvcaller_cohort.nf.test +++ b/tests/germlinecnvcaller_cohort.nf.test @@ -10,11 +10,15 @@ nextflow_pipeline { when { params { - gcnv_model_name = 'cohort' - gcnv_scatter_content = 2 - input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" - outdir = "$outputDir" - tools = 'germlinecnvcaller' + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + fasta = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.fasta" + dict = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.dict" + fai = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.fasta.fai" + gcnv_model_name = 'cohort' + gcnv_scatter_content = 2 + input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" + outdir = "$outputDir" + tools = 'germlinecnvcaller' } } From ba89def56cea5ae23b09750dc2b7c9926dc371d0 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 18:49:49 +0200 Subject: [PATCH 172/234] update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7679a90..3a8f2e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,8 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#35](https://github.com/nf-core/createpanelrefs/pull/35) - Template update for nf-core/tools v3.0.2 - [#35](https://github.com/nf-core/createpanelrefs/pull/35) - Improve pipeline level tests - [#48](https://github.com/nf-core/createpanelrefs/pull/48) - Improve CI (early failure + automatic nf-test shards + [RunsOn](https://runs-on.com/)) +- [#49](https://github.com/nf-core/createpanelrefs/pull/49) - Improve CI (Test Mutect2 with CRAM + better usage of test references) +- [#49](https://github.com/nf-core/createpanelrefs/pull/49) - Move all parameters in the schema that are references in the references section ### `Fixed` From 5f8be571c3e19a7ee0d67be3e4a1f0b99f651444 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 19:00:26 +0200 Subject: [PATCH 173/234] fix igenomes --- tests/default.nf.test | 5 +++-- tests/gens_pon.nf.test | 2 +- tests/germlinecnvcaller_cohort.nf.test | 3 ++- tests/mutect2.nf.test | 6 ++++-- tests/nextflow.config | 4 ---- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/default.nf.test b/tests/default.nf.test index d3497a5..1d977ab 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -9,8 +9,9 @@ nextflow_pipeline { when { params { - outdir = "$outputDir" - input = "${projectDir}/tests/csv/1.0.0/bam_cram.csv" + igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data' + input = "${projectDir}/tests/csv/1.0.0/bam_cram.csv" + outdir = "$outputDir" } } diff --git a/tests/gens_pon.nf.test b/tests/gens_pon.nf.test index 0d14d26..f04d069 100644 --- a/tests/gens_pon.nf.test +++ b/tests/gens_pon.nf.test @@ -10,8 +10,8 @@ nextflow_pipeline { when { params { - igenomes_ignore = true genome = null + igenomes_ignore = true modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' fasta = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.fasta" gens_bin_length = 100 diff --git a/tests/germlinecnvcaller_cohort.nf.test b/tests/germlinecnvcaller_cohort.nf.test index 6136dfe..e921e4d 100644 --- a/tests/germlinecnvcaller_cohort.nf.test +++ b/tests/germlinecnvcaller_cohort.nf.test @@ -10,10 +10,11 @@ nextflow_pipeline { when { params { + igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data' modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - fasta = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.fasta" dict = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.dict" fai = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.fasta.fai" + fasta = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.fasta" gcnv_model_name = 'cohort' gcnv_scatter_content = 2 input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" diff --git a/tests/mutect2.nf.test b/tests/mutect2.nf.test index 45dc250..090be58 100644 --- a/tests/mutect2.nf.test +++ b/tests/mutect2.nf.test @@ -8,9 +8,10 @@ nextflow_pipeline { when { params { + igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data' + mutect2_pon_name = 'test' outdir = "$outputDir" tools = 'mutect2' - mutect2_pon_name = 'test' } } @@ -39,10 +40,11 @@ nextflow_pipeline { when { params { + igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data' input = "${projectDir}/tests/csv/1.0.0/cram.csv" + mutect2_pon_name = 'test' outdir = "$outputDir" tools = 'mutect2' - mutect2_pon_name = 'test' } } diff --git a/tests/nextflow.config b/tests/nextflow.config index 393cbed..afea9a5 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -9,7 +9,3 @@ // Should resolve issue with accessing s3 from the runners aws.client.anonymous = true - -// Should take care of all basepath -params.modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' -params.igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data' From bee6cb87031220bc4545d2e18ebdd38ffdf5468b Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 19:08:19 +0200 Subject: [PATCH 174/234] fix and simplify igenomes_base usage --- conf/test.config | 3 ++- tests/default.nf.test | 5 ++--- tests/germlinecnvcaller_cohort.nf.test | 1 - tests/mutect2.nf.test | 2 -- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/conf/test.config b/conf/test.config index eda3efc..079facb 100644 --- a/conf/test.config +++ b/conf/test.config @@ -32,5 +32,6 @@ params { gcnv_scatter_content = 2 // Small reference genome - genome = 'GRCh38.chr21.testdata' + genome = 'GRCh38.chr21.testdata' + igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data' } diff --git a/tests/default.nf.test b/tests/default.nf.test index 1d977ab..64e2159 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -9,9 +9,8 @@ nextflow_pipeline { when { params { - igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data' - input = "${projectDir}/tests/csv/1.0.0/bam_cram.csv" - outdir = "$outputDir" + input = "${projectDir}/tests/csv/1.0.0/bam_cram.csv" + outdir = "$outputDir" } } diff --git a/tests/germlinecnvcaller_cohort.nf.test b/tests/germlinecnvcaller_cohort.nf.test index e921e4d..4db65f7 100644 --- a/tests/germlinecnvcaller_cohort.nf.test +++ b/tests/germlinecnvcaller_cohort.nf.test @@ -10,7 +10,6 @@ nextflow_pipeline { when { params { - igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data' modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' dict = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.dict" fai = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.fasta.fai" diff --git a/tests/mutect2.nf.test b/tests/mutect2.nf.test index 090be58..cae0f59 100644 --- a/tests/mutect2.nf.test +++ b/tests/mutect2.nf.test @@ -8,7 +8,6 @@ nextflow_pipeline { when { params { - igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data' mutect2_pon_name = 'test' outdir = "$outputDir" tools = 'mutect2' @@ -40,7 +39,6 @@ nextflow_pipeline { when { params { - igenomes_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data' input = "${projectDir}/tests/csv/1.0.0/cram.csv" mutect2_pon_name = 'test' outdir = "$outputDir" From 9d27e7a85a0c6ab54c8181b59285ed22791a85cd Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 19:43:18 +0200 Subject: [PATCH 175/234] more improvments --- conf/igenomes.config | 5 +++++ tests/gens_pon.nf.test | 17 +++++++---------- tests/germlinecnvcaller_cohort.nf.test | 15 ++++++--------- tests/germlinecnvcaller_cohort.nf.test.snap | 14 ++------------ 4 files changed, 20 insertions(+), 31 deletions(-) diff --git a/conf/igenomes.config b/conf/igenomes.config index c7f030b..d0f5074 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -472,6 +472,11 @@ params { dict = "${params.igenomes_base}/genomics/homo_sapiens/genome/chr21/sequence/genome.dict" fai = "${params.igenomes_base}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" mutect2_target_bed = "${params.igenomes_base}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + } + 'GRCh38.chr22.testdata' { + fasta = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.fasta" + dict = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.dict" + fai = "${params.igenomes_base}/genomics/homo_sapiens/genome/genome.fasta.fai" gcnv_ploidy_priors = "${params.igenomes_base}/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" } } diff --git a/tests/gens_pon.nf.test b/tests/gens_pon.nf.test index f04d069..d5d5205 100644 --- a/tests/gens_pon.nf.test +++ b/tests/gens_pon.nf.test @@ -10,16 +10,13 @@ nextflow_pipeline { when { params { - genome = null - igenomes_ignore = true - modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - fasta = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.fasta" - gens_bin_length = 100 - gens_pon_name = 'gens_pon' - gens_readcount_format = "TSV" - input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" - outdir = "$outputDir" - tools = 'gens' + genome = 'GRCh38.chr22.testdata' + gens_bin_length = 100 + gens_pon_name = 'gens_pon' + gens_readcount_format = "TSV" + input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" + outdir = "$outputDir" + tools = 'gens' } } diff --git a/tests/germlinecnvcaller_cohort.nf.test b/tests/germlinecnvcaller_cohort.nf.test index 4db65f7..99ad00c 100644 --- a/tests/germlinecnvcaller_cohort.nf.test +++ b/tests/germlinecnvcaller_cohort.nf.test @@ -10,15 +10,12 @@ nextflow_pipeline { when { params { - modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - dict = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.dict" - fai = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.fasta.fai" - fasta = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.fasta" - gcnv_model_name = 'cohort' - gcnv_scatter_content = 2 - input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" - outdir = "$outputDir" - tools = 'germlinecnvcaller' + genome = 'GRCh38.chr22.testdata' + gcnv_model_name = 'cohort' + gcnv_scatter_content = 2 + input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" + outdir = "$outputDir" + tools = 'germlinecnvcaller' } } diff --git a/tests/germlinecnvcaller_cohort.nf.test.snap b/tests/germlinecnvcaller_cohort.nf.test.snap index 2a525ef..c24d518 100644 --- a/tests/germlinecnvcaller_cohort.nf.test.snap +++ b/tests/germlinecnvcaller_cohort.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test --tools germlinecnvcaller --input tests/csv/1.0.0/bam_sorted.csv --gcnv_model_name cohort --gcnv_ploidy_priors --gcnv_scatter_content 2": { "content": [ - 12, + 10, { "GATK4_ANNOTATEINTERVALS": { "gatk4": "4.5.0.0" @@ -9,9 +9,6 @@ "GATK4_COLLECTREADCOUNTS": { "gatk4": "4.5.0.0" }, - "GATK4_CREATESEQUENCEDICTIONARY": { - "gatk4": "4.5.0.0" - }, "GATK4_DETERMINEGERMLINECONTIGPLOIDY": { "gatk4": "4.5.0.0" }, @@ -27,9 +24,6 @@ "GATK4_PREPROCESSINTERVALS": { "gatk4": "4.5.0.0" }, - "SAMTOOLS_FAIDX": { - "samtools": 1.21 - }, "Workflow": { "nf-core/createpanelrefs": "v1.0dev" } @@ -142,9 +136,6 @@ "germlinecnvcaller/readcounts", "germlinecnvcaller/readcounts/sample1.hdf5", "germlinecnvcaller/readcounts/sample2.hdf5", - "germlinecnvcaller/references", - "germlinecnvcaller/references/genome.dict", - "germlinecnvcaller/references/genome.fasta.fai", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -179,7 +170,6 @@ "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", "interval_list.tsv:md5,9f7718372e87e69aeb837678755a9971", - "genome.fasta.fai:md5,3520cd30e1b100e55f578db9c855f685", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], @@ -187,6 +177,6 @@ "nf-test": "0.9.2", "nextflow": "24.10.5" }, - "timestamp": "2025-04-09T15:03:29.439388615" + "timestamp": "2025-04-11T19:42:40.668405904" } } \ No newline at end of file From e46a4f4df08c4cc0b7478eb6538ca665bf43ff18 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 19:54:14 +0200 Subject: [PATCH 176/234] update snapshot --- tests/gens_pon.nf.test.snap | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/tests/gens_pon.nf.test.snap b/tests/gens_pon.nf.test.snap index 302a6f8..c597f8b 100644 --- a/tests/gens_pon.nf.test.snap +++ b/tests/gens_pon.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test --tools gens --input tests/csv/1.0.0/bam_sorted.csv --gens_bin_length 100 --gens_pon_name gens_pon --gens_readcount_format TSV": { "content": [ - 7, + 5, { "GATK4_COLLECTREADCOUNTS": { "gatk4": "4.5.0.0" @@ -9,15 +9,9 @@ "GATK4_CREATEREADCOUNTPANELOFNORMALS": { "gatk4": "4.5.0.0" }, - "GATK4_CREATESEQUENCEDICTIONARY": { - "gatk4": "4.5.0.0" - }, "GATK4_PREPROCESSINTERVALS": { "gatk4": "4.5.0.0" }, - "SAMTOOLS_FAIDX": { - "samtools": 1.21 - }, "Workflow": { "nf-core/createpanelrefs": "v1.0dev" } @@ -31,9 +25,6 @@ "gens_pon/readcounts", "gens_pon/readcounts/sample1.tsv", "gens_pon/readcounts/sample2.tsv", - "gens_pon/references", - "gens_pon/references/genome.dict", - "gens_pon/references/genome.fasta.fai", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -48,7 +39,6 @@ [ "sample1.tsv:md5,bfceab35109b04317f38f9cc1cde71ca", "sample2.tsv:md5,7141d08cdc26f6057557be9e23ef4365", - "genome.fasta.fai:md5,3520cd30e1b100e55f578db9c855f685", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], @@ -56,6 +46,6 @@ "nf-test": "0.9.2", "nextflow": "24.10.5" }, - "timestamp": "2025-04-09T14:48:02.583474325" + "timestamp": "2025-04-11T19:53:59.716710047" } } \ No newline at end of file From 91ca88cc786c3061466d6867a3373ef1bd4d7d7e Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 11 Apr 2025 20:16:32 +0200 Subject: [PATCH 177/234] alphabetical order --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 72f70d0..40e3386 100644 --- a/main.nf +++ b/main.nf @@ -15,9 +15,9 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = getGenomeAttribute('fasta') params.dict = getGenomeAttribute('dict') params.fai = getGenomeAttribute('fai') +params.fasta = getGenomeAttribute('fasta') params.gcnv_exclude_bed = getGenomeAttribute('gcnv_exclude_bed') params.gcnv_exclude_interval_list = getGenomeAttribute('gcnv_exclude_interval_list') params.gcnv_mappable_regions = getGenomeAttribute('gcnv_mappable_regions') From 391bf5d074d5d3a8235e75816aa15128e435e165 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Mon, 14 Apr 2025 18:38:42 +0200 Subject: [PATCH 178/234] improve some references usage --- conf/modules/gens_pon.config | 29 -- conf/modules/germlinecnvcaller_cohort.config | 18 - conf/modules/prepare_genome.config | 41 +++ nextflow.config | 1 + subworkflows/local/gens_pon/main.nf | 136 +++---- .../local/germlinecnvcaller_cohort/main.nf | 348 +++++++++--------- subworkflows/local/prepare_genome/main.nf | 57 +++ tests/.nftignore | 4 +- tests/gens_pon.config | 1 - tests/gens_pon.nf.test | 1 + tests/gens_pon.nf.test.snap | 10 +- tests/mutect2.nf.test | 3 + workflows/createpanelrefs.nf | 283 +++++++------- 13 files changed, 487 insertions(+), 445 deletions(-) create mode 100644 conf/modules/prepare_genome.config create mode 100644 subworkflows/local/prepare_genome/main.nf diff --git a/conf/modules/gens_pon.config b/conf/modules/gens_pon.config index 3c10be7..e2ac2ae 100644 --- a/conf/modules/gens_pon.config +++ b/conf/modules/gens_pon.config @@ -18,35 +18,6 @@ process { ] } - withName: '.*GENS_PON:SAMTOOLS_FAIDX' { - ext.when = { params.fai.equals(null) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/gens_pon/references" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*GENS_PON:GATK4_CREATESEQUENCEDICTIONARY' { - ext.when = { params.dict.equals(null) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/gens_pon/references" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*GENS_PON:GATK4_PREPROCESSINTERVALS' { - ext.args = { ["--imr OVERLAPPING_ONLY", - "--bin-length ${params.gens_bin_length}"].join(" ") - } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/gens_pon/intervals" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*GENS_PON:GATK4_COLLECTREADCOUNTS' { ext.args = {"--format ${params.gens_readcount_format} --imr OVERLAPPING_ONLY"} publishDir = [ diff --git a/conf/modules/germlinecnvcaller_cohort.config b/conf/modules/germlinecnvcaller_cohort.config index ea074a8..8a4d21e 100644 --- a/conf/modules/germlinecnvcaller_cohort.config +++ b/conf/modules/germlinecnvcaller_cohort.config @@ -18,24 +18,6 @@ process { ] } - withName: '.*GERMLINECNVCALLER_COHORT:SAMTOOLS_FAIDX' { - ext.when = { params.fai.equals(null) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/germlinecnvcaller/references" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*GERMLINECNVCALLER_COHORT:GATK4_CREATESEQUENCEDICTIONARY' { - ext.when = { params.dict.equals(null) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/germlinecnvcaller/references" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*GERMLINECNVCALLER_COHORT:GATK4_INDEXFEATUREFILE_MAPPABILITY' { ext.when = { !params.gcnv_mappable_regions.equals(null) } } diff --git a/conf/modules/prepare_genome.config b/conf/modules/prepare_genome.config new file mode 100644 index 0000000..7f3b851 --- /dev/null +++ b/conf/modules/prepare_genome.config @@ -0,0 +1,41 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + withName: 'GATK4_CREATESEQUENCEDICTIONARY' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/references" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GATK4_PREPROCESSINTERVALS' { + ext.args = { ["--imr OVERLAPPING_ONLY", + "--bin-length ${params.gens_bin_length}"].join(" ") + } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/references/intervals/gens_pon/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SAMTOOLS_FAIDX' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/references" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/nextflow.config b/nextflow.config index d607448..e1770df 100644 --- a/nextflow.config +++ b/nextflow.config @@ -320,3 +320,4 @@ includeConfig 'conf/modules/cnvkit.config' includeConfig 'conf/modules/germlinecnvcaller_cohort.config' includeConfig 'conf/modules/gens_pon.config' includeConfig 'conf/modules/mutect2.config' +includeConfig 'conf/modules/prepare_genome.config' diff --git a/subworkflows/local/gens_pon/main.nf b/subworkflows/local/gens_pon/main.nf index b82e2ba..82b2250 100644 --- a/subworkflows/local/gens_pon/main.nf +++ b/subworkflows/local/gens_pon/main.nf @@ -1,91 +1,61 @@ -include { GATK4_COLLECTREADCOUNTS } from '../../../modules/nf-core/gatk4/collectreadcounts/main' -include { GATK4_CREATEREADCOUNTPANELOFNORMALS } from '../../../modules/nf-core/gatk4/createreadcountpanelofnormals/main' -include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary/main' -include { GATK4_PREPROCESSINTERVALS } from '../../../modules/nf-core/gatk4/preprocessintervals/main' -include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' -include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { GATK4_COLLECTREADCOUNTS } from '../../../modules/nf-core/gatk4/collectreadcounts' +include { GATK4_CREATEREADCOUNTPANELOFNORMALS } from '../../../modules/nf-core/gatk4/createreadcountpanelofnormals' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index' workflow GENS_PON { take: - ch_user_dict // channel: [optional] [ val(meta), path(dict) ] - ch_user_fai // channel: [optional] [ val(meta), path(fai) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] - val_pon_name // string: [optional] name for panel of normals + ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] + val_pon_name // string: [optional] name for panel of normals + ch_dict // channel: [optional] [ val(meta), path(dict) ] + ch_fai // channel: [optional] [ val(meta), path(fai) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_interval_list // channel: [mandatory] [ val(meta), path(interval_list) ] main: - ch_versions = Channel.empty() - - // - // Prepare references - // - SAMTOOLS_FAIDX ( ch_fasta, [[:],[]] ) - - GATK4_CREATESEQUENCEDICTIONARY ( ch_fasta ) - - ch_user_dict - .mix(GATK4_CREATESEQUENCEDICTIONARY.out.dict) - .collect() - .set { ch_dict } - - ch_user_fai - .mix(SAMTOOLS_FAIDX.out.fai) - .collect() - .set { ch_fai } - - GATK4_PREPROCESSINTERVALS ( ch_fasta, ch_fai, ch_dict, [[:],[]], [[:],[]] ) - - // - // Filter out files that lack indices, and generate them - // - ch_input - .branch { meta, alignment, index -> - alignment_with_index: index.size() > 0 - return [meta, alignment, index] - alignment_without_index: index.size() == 0 - return [meta, alignment] - } - .set { ch_for_mix } - - SAMTOOLS_INDEX ( ch_for_mix.alignment_without_index ) - - SAMTOOLS_INDEX.out.bai - .mix(SAMTOOLS_INDEX.out.crai) - .set { ch_index } - - // - // Collect alignment files and their indices - // - ch_for_mix.alignment_without_index - .join(ch_index) - .mix(ch_for_mix.alignment_with_index) - .combine(GATK4_PREPROCESSINTERVALS.out.interval_list.map{it -> it[1]}) - .set {ch_readcounts_in} - - // - // Collect read counts, and generate models - // - GATK4_COLLECTREADCOUNTS ( ch_readcounts_in, ch_fasta, ch_fai, ch_dict ) - - GATK4_COLLECTREADCOUNTS.out.tsv - .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) - .collect { it[1] } - .map { it -> - return [[id:val_pon_name], it] - } - .set { ch_readcounts_out } - - GATK4_CREATEREADCOUNTPANELOFNORMALS ( ch_readcounts_out ) - - ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_CREATEREADCOUNTPANELOFNORMALS.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) - ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + versions = Channel.empty() + + // Filter out files that lack indices, and generate them + ch_input + .branch { meta, alignment, index -> + alignment_with_index: index.size() > 0 + return [meta, alignment, index] + alignment_without_index: index.size() == 0 + return [meta, alignment] + } + .set { ch_for_mix } + + SAMTOOLS_INDEX(ch_for_mix.alignment_without_index) + + SAMTOOLS_INDEX.out.bai + .mix(SAMTOOLS_INDEX.out.crai) + .set { ch_index } + + // Collect alignment files and their indices + ch_for_mix.alignment_without_index + .join(ch_index) + .mix(ch_for_mix.alignment_with_index) + .combine(ch_interval_list.map { it -> it[1] }) + .set { ch_readcounts_in } + + // Collect read counts, and generate models + GATK4_COLLECTREADCOUNTS(ch_readcounts_in, ch_fasta, ch_fai, ch_dict) + + GATK4_COLLECTREADCOUNTS.out.tsv + .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) + .collect { it[1] } + .map { it -> + return [[id: val_pon_name], it] + } + .set { ch_readcounts_out } + + GATK4_CREATEREADCOUNTPANELOFNORMALS(ch_readcounts_out) + + versions = versions.mix(GATK4_COLLECTREADCOUNTS.out.versions) + versions = versions.mix(GATK4_CREATEREADCOUNTPANELOFNORMALS.out.versions) + versions = versions.mix(SAMTOOLS_INDEX.out.versions) emit: - genspon = GATK4_CREATEREADCOUNTPANELOFNORMALS.out.pon - readcounts = ch_readcounts_out - versions = ch_versions + genspon = GATK4_CREATEREADCOUNTPANELOFNORMALS.out.pon + readcounts = ch_readcounts_out + versions } diff --git a/subworkflows/local/germlinecnvcaller_cohort/main.nf b/subworkflows/local/germlinecnvcaller_cohort/main.nf index df8e869..129f954 100644 --- a/subworkflows/local/germlinecnvcaller_cohort/main.nf +++ b/subworkflows/local/germlinecnvcaller_cohort/main.nf @@ -1,190 +1,174 @@ -include { GATK4_ANNOTATEINTERVALS } from '../../../modules/nf-core/gatk4/annotateintervals/main' -include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_TARGETS } from '../../../modules/nf-core/gatk4/bedtointervallist/main' -include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_EXCLUDE } from '../../../modules/nf-core/gatk4/bedtointervallist/main' -include { GATK4_COLLECTREADCOUNTS } from '../../../modules/nf-core/gatk4/collectreadcounts/main' -include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary/main' -include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../../modules/nf-core/gatk4/determinegermlinecontigploidy/main' -include { GATK4_FILTERINTERVALS } from '../../../modules/nf-core/gatk4/filterintervals/main' -include { GATK4_GERMLINECNVCALLER } from '../../../modules/nf-core/gatk4/germlinecnvcaller/main' -include { GATK4_INDEXFEATUREFILE as GATK4_INDEXFEATUREFILE_MAPPABILITY } from '../../../modules/nf-core/gatk4/indexfeaturefile/main' -include { GATK4_INDEXFEATUREFILE as GATK4_INDEXFEATUREFILE_SEGDUP } from '../../../modules/nf-core/gatk4/indexfeaturefile/main' -include { GATK4_INTERVALLISTTOOLS } from '../../../modules/nf-core/gatk4/intervallisttools/main' -include { GATK4_PREPROCESSINTERVALS } from '../../../modules/nf-core/gatk4/preprocessintervals/main' -include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' -include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { GATK4_ANNOTATEINTERVALS } from '../../../modules/nf-core/gatk4/annotateintervals' +include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_TARGETS } from '../../../modules/nf-core/gatk4/bedtointervallist' +include { GATK4_BEDTOINTERVALLIST as GATK4_BEDTOINTERVALLIST_EXCLUDE } from '../../../modules/nf-core/gatk4/bedtointervallist' +include { GATK4_COLLECTREADCOUNTS } from '../../../modules/nf-core/gatk4/collectreadcounts' +include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../../modules/nf-core/gatk4/determinegermlinecontigploidy' +include { GATK4_FILTERINTERVALS } from '../../../modules/nf-core/gatk4/filterintervals' +include { GATK4_GERMLINECNVCALLER } from '../../../modules/nf-core/gatk4/germlinecnvcaller' +include { GATK4_INDEXFEATUREFILE as GATK4_INDEXFEATUREFILE_MAPPABILITY } from '../../../modules/nf-core/gatk4/indexfeaturefile' +include { GATK4_INDEXFEATUREFILE as GATK4_INDEXFEATUREFILE_SEGDUP } from '../../../modules/nf-core/gatk4/indexfeaturefile' +include { GATK4_INTERVALLISTTOOLS } from '../../../modules/nf-core/gatk4/intervallisttools' +include { GATK4_PREPROCESSINTERVALS } from '../../../modules/nf-core/gatk4/preprocessintervals' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index' workflow GERMLINECNVCALLER_COHORT { take: - ch_user_dict // channel: [optional] [ val(meta), path(dict) ] - ch_user_fai // channel: [optional] [ val(meta), path(fai) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] - ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] - ch_mappable_regions // channel: [optional] [ val(meta), path(bed) ] - ch_segmental_duplications // channel: [optional] [ val(meta), path(bed) ] - ch_target_bed // channel: [optional] [ val(meta), path(bed) ] - ch_user_target_interval_list // channel: [optional] [ val(meta), path(intervals) ] - ch_exclude_bed // channel: [optional] [ val(meta), path(bed) ] - ch_user_exclude_interval_list // channel: [optional] [ val(meta), path(intervals) ] - val_pon_name // string: [optional] name for panel of normals + ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] + val_pon_name // string: [optional] name for panel of normals + ch_dict // channel: [optional] [ val(meta), path(dict) ] + ch_exclude_bed // channel: [optional] [ val(meta), path(bed) ] + ch_fai // channel: [optional] [ val(meta), path(fai) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_mappable_regions // channel: [optional] [ val(meta), path(bed) ] + ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] + ch_segmental_duplications // channel: [optional] [ val(meta), path(bed) ] + ch_target_bed // channel: [optional] [ val(meta), path(bed) ] + ch_user_exclude_interval_list // channel: [optional] [ val(meta), path(intervals) ] + ch_user_target_interval_list // channel: [optional] [ val(meta), path(intervals) ] main: - ch_versions = Channel.empty() - - // - // Prepare references - // - SAMTOOLS_FAIDX ( ch_fasta, [[:],[]] ) - - GATK4_CREATESEQUENCEDICTIONARY ( ch_fasta ) - - GATK4_INDEXFEATUREFILE_MAPPABILITY ( ch_mappable_regions ) - - GATK4_INDEXFEATUREFILE_SEGDUP ( ch_segmental_duplications ) - - ch_user_dict - .mix(GATK4_CREATESEQUENCEDICTIONARY.out.dict) - .collect() - .set { ch_dict } - - ch_user_fai - .mix(SAMTOOLS_FAIDX.out.fai) - .collect() - .set { ch_fai } - - GATK4_BEDTOINTERVALLIST_TARGETS (ch_target_bed, ch_dict) //Runs for wes analysis, when target_bed file is provided instead of target_interval_list - GATK4_BEDTOINTERVALLIST_EXCLUDE (ch_exclude_bed, ch_dict) //Runs for wes analysis, when exclude_bed file is provided instead of target_interval_list - - ch_user_target_interval_list - .combine(GATK4_BEDTOINTERVALLIST_TARGETS.out.interval_list.ifEmpty(null)) - .branch { it -> - intervallistfrompath: it[2].equals(null) - return [it[0], it[1]] - intervallistfrombed: !(it[2].equals(null)) - return [it[2], it[3]] - } - .set { ch_targets_for_mix } - - ch_targets_for_mix.intervallistfrompath.mix(ch_targets_for_mix.intervallistfrombed) - .collect() - .set {ch_target_interval_list} - - ch_user_exclude_interval_list - .combine(GATK4_BEDTOINTERVALLIST_EXCLUDE.out.interval_list.ifEmpty(null)) - .branch { it -> - intervallistfrompath: it[2].equals(null) - return [it[0], it[1]] - intervallistfrombed: !(it[2].equals(null)) - return [it[2], it[3]] - } - .set { ch_exclude_for_mix } - - ch_exclude_for_mix.intervallistfrompath.mix(ch_exclude_for_mix.intervallistfrombed) - .collect() - .set { ch_exclude_interval_list } - - GATK4_PREPROCESSINTERVALS ( ch_fasta, - ch_fai, - ch_dict, - ch_target_interval_list, - ch_exclude_interval_list) - - GATK4_ANNOTATEINTERVALS ( GATK4_PREPROCESSINTERVALS.out.interval_list, - ch_fasta, - ch_fai, - ch_dict, - ch_mappable_regions, - GATK4_INDEXFEATUREFILE_MAPPABILITY.out.index.ifEmpty([[:],[]]), - ch_segmental_duplications, - GATK4_INDEXFEATUREFILE_SEGDUP.out.index.ifEmpty([[:],[]])) - - // - // Filter out files that lack indices, and generate them - // - ch_input - .branch { meta, alignment, index -> - alignment_with_index: index.size() > 0 - return [meta, alignment, index] - alignment_without_index: index.size() == 0 - return [meta, alignment] - } - .set { ch_for_mix } - - SAMTOOLS_INDEX ( ch_for_mix.alignment_without_index ) - - SAMTOOLS_INDEX.out.bai - .mix(SAMTOOLS_INDEX.out.crai) - .set { ch_index } - - // - // Collect alignment files and their indices - // - ch_for_mix.alignment_without_index - .join(ch_index) - .mix(ch_for_mix.alignment_with_index) - .combine(GATK4_PREPROCESSINTERVALS.out.interval_list.map{it -> it[1]}) - .set {ch_readcounts_in} - - // - // Collect read counts, and generate models - // - GATK4_COLLECTREADCOUNTS ( ch_readcounts_in, - ch_fasta, - ch_fai, - ch_dict ) - - GATK4_COLLECTREADCOUNTS.out.tsv - .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) - .collect { it[1] } - .map {tsv -> [[id:val_pon_name],tsv]} - .set { ch_readcounts_out } - - - GATK4_FILTERINTERVALS ( GATK4_PREPROCESSINTERVALS.out.interval_list, - ch_readcounts_out, - GATK4_ANNOTATEINTERVALS.out.annotated_intervals ) - - GATK4_INTERVALLISTTOOLS ( GATK4_FILTERINTERVALS.out.interval_list ) - .interval_list - .map {meta, it -> it} - .flatten() - .set { ch_intervallist_out } - - ch_readcounts_out - .combine(GATK4_FILTERINTERVALS.out.interval_list) - .map{ meta, counts, meta2, il -> [meta, counts, il, []] } - .set {ch_contigploidy_in} - - GATK4_DETERMINEGERMLINECONTIGPLOIDY ( ch_contigploidy_in, - [[:],[]], - ch_ploidy_priors ) - - ch_readcounts_out - .combine(ch_intervallist_out) - .combine(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.calls) - .map{ meta, counts, il, meta2, calls -> [meta + [id:il.baseName], counts, il, calls, []] } - .set {ch_cnvcaller_in} - - GATK4_GERMLINECNVCALLER ( ch_cnvcaller_in ) - - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) - ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) - ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST_TARGETS.out.versions) - ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST_EXCLUDE.out.versions) - ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_ANNOTATEINTERVALS.out.versions) - ch_versions = ch_versions.mix(GATK4_FILTERINTERVALS.out.versions) - ch_versions = ch_versions.mix(GATK4_INDEXFEATUREFILE_MAPPABILITY.out.versions) - ch_versions = ch_versions.mix(GATK4_INDEXFEATUREFILE_SEGDUP.out.versions) - ch_versions = ch_versions.mix(GATK4_INTERVALLISTTOOLS.out.versions) - ch_versions = ch_versions.mix(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.versions) - ch_versions = ch_versions.mix(GATK4_GERMLINECNVCALLER.out.versions.first()) + versions = Channel.empty() + + // Prepare references + GATK4_INDEXFEATUREFILE_MAPPABILITY(ch_mappable_regions) + GATK4_INDEXFEATUREFILE_SEGDUP(ch_segmental_duplications) + + //Runs for wes analysis, when target_bed file is provided instead of target_interval_list + GATK4_BEDTOINTERVALLIST_TARGETS(ch_target_bed, ch_dict) + + //Runs for wes analysis, when exclude_bed file is provided instead of target_interval_list + GATK4_BEDTOINTERVALLIST_EXCLUDE(ch_exclude_bed, ch_dict) + + ch_user_target_interval_list + .combine(GATK4_BEDTOINTERVALLIST_TARGETS.out.interval_list.ifEmpty(null)) + .branch { it -> + intervallistfrompath: it[2].equals(null) + return [it[0], it[1]] + intervallistfrombed: !it[2].equals(null) + return [it[2], it[3]] + } + .set { ch_targets_for_mix } + + ch_targets_for_mix.intervallistfrompath + .mix(ch_targets_for_mix.intervallistfrombed) + .collect() + .set { ch_target_interval_list } + + ch_user_exclude_interval_list + .combine(GATK4_BEDTOINTERVALLIST_EXCLUDE.out.interval_list.ifEmpty(null)) + .branch { it -> + intervallistfrompath: it[2].equals(null) + return [it[0], it[1]] + intervallistfrombed: !it[2].equals(null) + return [it[2], it[3]] + } + .set { ch_exclude_for_mix } + + ch_exclude_for_mix.intervallistfrompath + .mix(ch_exclude_for_mix.intervallistfrombed) + .collect() + .set { ch_exclude_interval_list } + + GATK4_PREPROCESSINTERVALS( + ch_fasta, + ch_fai, + ch_dict, + ch_target_interval_list, + ch_exclude_interval_list, + ) + + GATK4_ANNOTATEINTERVALS( + GATK4_PREPROCESSINTERVALS.out.interval_list, + ch_fasta, + ch_fai, + ch_dict, + ch_mappable_regions, + GATK4_INDEXFEATUREFILE_MAPPABILITY.out.index.ifEmpty([[:], []]), + ch_segmental_duplications, + GATK4_INDEXFEATUREFILE_SEGDUP.out.index.ifEmpty([[:], []]), + ) + + // Filter out files that lack indices, and generate them + ch_input + .branch { meta, alignment, index -> + alignment_with_index: index.size() > 0 + return [meta, alignment, index] + alignment_without_index: index.size() == 0 + return [meta, alignment] + } + .set { ch_for_mix } + + SAMTOOLS_INDEX(ch_for_mix.alignment_without_index) + + SAMTOOLS_INDEX.out.bai + .mix(SAMTOOLS_INDEX.out.crai) + .set { ch_index } + + // Collect alignment files and their indices + ch_for_mix.alignment_without_index + .join(ch_index) + .mix(ch_for_mix.alignment_with_index) + .combine(GATK4_PREPROCESSINTERVALS.out.interval_list.map { it -> it[1] }) + .set { ch_readcounts_in } + + // Collect read counts, and generate models + GATK4_COLLECTREADCOUNTS( + ch_readcounts_in, + ch_fasta, + ch_fai, + ch_dict, + ) + + GATK4_COLLECTREADCOUNTS.out.tsv + .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) + .collect { it[1] } + .map { tsv -> [[id: val_pon_name], tsv] } + .set { ch_readcounts_out } + + + GATK4_FILTERINTERVALS( + GATK4_PREPROCESSINTERVALS.out.interval_list, + ch_readcounts_out, + GATK4_ANNOTATEINTERVALS.out.annotated_intervals, + ) + + GATK4_INTERVALLISTTOOLS(GATK4_FILTERINTERVALS.out.interval_list).interval_list.map { meta, it -> it }.flatten().set { ch_intervallist_out } + + ch_readcounts_out + .combine(GATK4_FILTERINTERVALS.out.interval_list) + .map { meta, counts, meta2, il -> [meta, counts, il, []] } + .set { ch_contigploidy_in } + + GATK4_DETERMINEGERMLINECONTIGPLOIDY( + ch_contigploidy_in, + [[:], []], + ch_ploidy_priors, + ) + + ch_readcounts_out + .combine(ch_intervallist_out) + .combine(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.calls) + .map { meta, counts, il, meta2, calls -> [meta + [id: il.baseName], counts, il, calls, []] } + .set { ch_cnvcaller_in } + + GATK4_GERMLINECNVCALLER(ch_cnvcaller_in) + + versions = versions.mix(SAMTOOLS_INDEX.out.versions) + versions = versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) + versions = versions.mix(GATK4_BEDTOINTERVALLIST_TARGETS.out.versions) + versions = versions.mix(GATK4_BEDTOINTERVALLIST_EXCLUDE.out.versions) + versions = versions.mix(GATK4_COLLECTREADCOUNTS.out.versions) + versions = versions.mix(GATK4_ANNOTATEINTERVALS.out.versions) + versions = versions.mix(GATK4_FILTERINTERVALS.out.versions) + versions = versions.mix(GATK4_INDEXFEATUREFILE_MAPPABILITY.out.versions) + versions = versions.mix(GATK4_INDEXFEATUREFILE_SEGDUP.out.versions) + versions = versions.mix(GATK4_INTERVALLISTTOOLS.out.versions) + versions = versions.mix(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.versions) + versions = versions.mix(GATK4_GERMLINECNVCALLER.out.versions) emit: - cnvmodel = GATK4_GERMLINECNVCALLER.out.cohortmodel - ploidymodel = GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.model - readcounts = ch_readcounts_out - versions = ch_versions + cnvmodel = GATK4_GERMLINECNVCALLER.out.cohortmodel + ploidymodel = GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.model + readcounts = ch_readcounts_out + versions } diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf new file mode 100644 index 0000000..734ecbb --- /dev/null +++ b/subworkflows/local/prepare_genome/main.nf @@ -0,0 +1,57 @@ +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx' +include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary' +include { GATK4_PREPROCESSINTERVALS } from '../../../modules/nf-core/gatk4/preprocessintervals' + +workflow PREPARE_GENOME { + take: + fasta // channel: [mandatory] [ val(meta), path(fasta) ] + user_dict // channel: [optional] [ val(meta), path(dict) ] + user_fai // channel: [optional] [ val(meta), path(fai) ] + user_gens_interval_list // channel: [optional] [ val(meta), path(interval_list) ] + + main: + versions = Channel.empty() + + // Prepare references + fasta_for_fai = fasta + .mix(user_fai) + .groupTuple() + .map { meta, files -> + files[1] ? null : [meta, files[0]] + } + + fasta_for_dict = fasta + .mix(user_dict) + .groupTuple() + .map { meta, files -> + files[1] ? null : [meta, files[0]] + } + + SAMTOOLS_FAIDX(fasta_for_fai, [[:], []]) + GATK4_CREATESEQUENCEDICTIONARY(fasta_for_dict) + + dict = user_dict.mix(GATK4_CREATESEQUENCEDICTIONARY.out.dict).collect() + + fai = user_fai.mix(SAMTOOLS_FAIDX.out.fai).collect() + + fasta_for_interval_list = fasta + .mix(user_gens_interval_list) + .groupTuple() + .map { meta, files -> + files[1] ? null : [meta, files[0]] + } + + GATK4_PREPROCESSINTERVALS(fasta_for_interval_list, fai.collect(), dict.collect(), [[:], []], [[:], []]) + + interval_list = user_gens_interval_list.mix(GATK4_PREPROCESSINTERVALS.out.interval_list).collect() + + versions = versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) + versions = versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) + versions = versions.mix(SAMTOOLS_FAIDX.out.versions) + + emit: + dict + fai + interval_list + versions +} diff --git a/tests/.nftignore b/tests/.nftignore index a43c2ef..f1661cf 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -1,9 +1,7 @@ -*/references/genome.dict gatk4/*.{vcf.gz,vcf.gz.tbi} gatk4/test/* gatk4/test/** gens_pon/createreadcountpanelofnormals/gens_pon.hdf5 -gens_pon/intervals/genome.interval_list germlinecnvcaller/determinegermlinecontigploidy/cohort-model/mu_psi_j_log__.tsv germlinecnvcaller/determinegermlinecontigploidy/cohort-model/std_psi_j_log__.tsv germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-calls/*/baseline_copy_number_t.tsv @@ -37,3 +35,5 @@ multiqc/multiqc_data/multiqc_sources.txt multiqc/multiqc_plots/{svg,pdf,png}/*.{svg,pdf,png} multiqc/multiqc_report.html pipeline_info/*.{html,json,txt,yml} +references/genome.dict +references/intervals/gens_pon/genome.interval_list diff --git a/tests/gens_pon.config b/tests/gens_pon.config index e89e138..6f069fd 100644 --- a/tests/gens_pon.config +++ b/tests/gens_pon.config @@ -5,7 +5,6 @@ process { } - profiles { docker { docker.runOptions = '-u root' diff --git a/tests/gens_pon.nf.test b/tests/gens_pon.nf.test index d5d5205..417fa67 100644 --- a/tests/gens_pon.nf.test +++ b/tests/gens_pon.nf.test @@ -12,6 +12,7 @@ nextflow_pipeline { params { genome = 'GRCh38.chr22.testdata' gens_bin_length = 100 + gens_interval_list = null gens_pon_name = 'gens_pon' gens_readcount_format = "TSV" input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" diff --git a/tests/gens_pon.nf.test.snap b/tests/gens_pon.nf.test.snap index c597f8b..4a4effc 100644 --- a/tests/gens_pon.nf.test.snap +++ b/tests/gens_pon.nf.test.snap @@ -20,8 +20,6 @@ "gens_pon", "gens_pon/createreadcountpanelofnormals", "gens_pon/createreadcountpanelofnormals/gens_pon.hdf5", - "gens_pon/intervals", - "gens_pon/intervals/genome.interval_list", "gens_pon/readcounts", "gens_pon/readcounts/sample1.tsv", "gens_pon/readcounts/sample2.tsv", @@ -34,7 +32,11 @@ "multiqc/multiqc_data/multiqc_sources.txt", "multiqc/multiqc_report.html", "pipeline_info", - "pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml" + "pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml", + "references", + "references/intervals", + "references/intervals/gens_pon", + "references/intervals/gens_pon/genome.interval_list" ], [ "sample1.tsv:md5,bfceab35109b04317f38f9cc1cde71ca", @@ -46,6 +48,6 @@ "nf-test": "0.9.2", "nextflow": "24.10.5" }, - "timestamp": "2025-04-11T19:53:59.716710047" + "timestamp": "2025-04-14T18:36:47.335786578" } } \ No newline at end of file diff --git a/tests/mutect2.nf.test b/tests/mutect2.nf.test index cae0f59..d6f5cb0 100644 --- a/tests/mutect2.nf.test +++ b/tests/mutect2.nf.test @@ -11,6 +11,9 @@ nextflow_pipeline { mutect2_pon_name = 'test' outdir = "$outputDir" tools = 'mutect2' + mutect2_target_bed = null + fai = null + dict = null } } diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 3c70a15..86cf207 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -4,10 +4,10 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_createpanelrefs_pipeline' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_createpanelrefs_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -15,9 +15,10 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_crea ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { GENS_PON } from '../subworkflows/local/gens_pon' -include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' -include { BAM_CREATE_SOM_PON_GATK } from '../subworkflows/nf-core/bam_create_som_pon_gatk' +include { GENS_PON } from '../subworkflows/local/gens_pon' +include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' +include { BAM_CREATE_SOM_PON_GATK } from '../subworkflows/nf-core/bam_create_som_pon_gatk' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -25,149 +26,179 @@ include { BAM_CREATE_SOM_PON_GATK } from '../subworkflows/nf-core/bam_create ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch' -include { SAMTOOLS_VIEW } from '../modules/nf-core/samtools/view' -include { MULTIQC } from '../modules/nf-core/multiqc' - -// Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName], dict]}.collect() - : Channel.empty() -ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName], fai]}.collect() - : Channel.empty() -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName], fasta]}.collect() - : Channel.empty() -// Initialize cnvkit specific parameters -ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName], targets]}.collect() - : Channel.value([[id:'null'], []]) -// Initialize germlinecnvcaller specific parameters -ch_gcnv_exclude_bed = params.gcnv_exclude_bed ? Channel.fromPath(params.gcnv_exclude_bed).map { exclude -> [[id:exclude.baseName], exclude]}.collect() - : Channel.value([[id:'null'], []]) -ch_gcnv_exclude_interval_list = params.gcnv_exclude_interval_list ? Channel.fromPath(params.gcnv_exclude_interval_list).map { exclude -> [[id:exclude.baseName], exclude]}.collect() - : Channel.value([[id:'null'], []]) -ch_gcnv_mappable_regions = params.gcnv_mappable_regions ? Channel.fromPath(params.gcnv_mappable_regions).collect() - : Channel.value([[id:'null'], []]) -ch_gcnv_ploidy_priors = params.gcnv_ploidy_priors ? Channel.fromPath(params.gcnv_ploidy_priors).collect() - : Channel.empty() -ch_gcnv_target_bed = params.gcnv_target_bed ? Channel.fromPath(params.gcnv_target_bed).map { targets -> [[id:targets.baseName], targets]}.collect() - : Channel.value([[id:'null'], []]) -ch_gcnv_target_interval_list = params.gcnv_target_interval_list ? Channel.fromPath(params.gcnv_target_interval_list).map { targets -> [[id:targets.baseName], targets]}.collect() - : Channel.value([[id:'null'], []]) -ch_gcnv_segmental_duplications = params.gcnv_segmental_duplications ? Channel.fromPath(params.gcnv_segmental_duplications).collect() - : Channel.value([[id:'null'], []]) -// Initialize mutect2 specific parameters -ch_mutect2_target_bed = params.mutect2_target_bed ? Channel.fromPath(params.mutect2_target_bed).collect() - : Channel.value([]) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ +include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch' +include { SAMTOOLS_VIEW } from '../modules/nf-core/samtools/view' +include { MULTIQC } from '../modules/nf-core/multiqc' workflow CREATEPANELREFS { - take: ch_samplesheet // channel: samplesheet read in from --input tools main: + ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() + // Initialize file channels based on params, defined in the params.genomes[params.genome] scope + ch_user_dict = params.dict + ? Channel.fromPath(params.dict).map { dict -> [[id: 'genome'], dict] }.collect() + : Channel.empty() + ch_user_fai = params.fai + ? Channel.fromPath(params.fai).map { fai -> [[id: 'genome'], fai] }.collect() + : Channel.empty() + ch_fasta = params.fasta + ? Channel.fromPath(params.fasta).map { fasta -> [[id: 'genome'], fasta] }.collect() + : Channel.empty() + // Initialize cnvkit specific parameters + ch_cnvkit_targets = params.cnvkit_targets + ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id: 'genome'], targets] }.collect() + : Channel.value([[id: 'genome'], []]) + // Initialize germlinecnvcaller specific parameters + ch_gcnv_exclude_bed = params.gcnv_exclude_bed + ? Channel.fromPath(params.gcnv_exclude_bed).map { exclude -> [[id: 'genome'], exclude] }.collect() + : Channel.value([[id: 'genome'], []]) + ch_gcnv_exclude_interval_list = params.gcnv_exclude_interval_list + ? Channel.fromPath(params.gcnv_exclude_interval_list).map { exclude -> [[id: 'genome'], exclude] }.collect() + : Channel.value([[id: 'genome'], []]) + ch_gcnv_mappable_regions = params.gcnv_mappable_regions + ? Channel.fromPath(params.gcnv_mappable_regions).collect() + : Channel.value([[id: 'genome'], []]) + ch_gcnv_ploidy_priors = params.gcnv_ploidy_priors + ? Channel.fromPath(params.gcnv_ploidy_priors).collect() + : Channel.empty() + ch_gcnv_target_bed = params.gcnv_target_bed + ? Channel.fromPath(params.gcnv_target_bed).map { targets -> [[id: 'genome'], targets] }.collect() + : Channel.value([[id: 'genome'], []]) + ch_gcnv_target_interval_list = params.gcnv_target_interval_list + ? Channel.fromPath(params.gcnv_target_interval_list).map { targets -> [[id: 'genome'], targets] }.collect() + : Channel.value([[id: 'genome'], []]) + ch_gcnv_segmental_duplications = params.gcnv_segmental_duplications + ? Channel.fromPath(params.gcnv_segmental_duplications).collect() + : Channel.value([[id: 'genome'], []]) + // Initialize mutect2 specific parameters + ch_mutect2_target_bed = params.mutect2_target_bed + ? Channel.fromPath(params.mutect2_target_bed).collect() + : Channel.value([[id: 'genome'], []]) + + // Initialize interval list specific parameters (GENS) + ch_user_interval_list = params.interval_list + ? Channel.fromPath(params.interval_list).map { interval_list -> [[id: 'genome'], interval_list] }.collect() + : Channel.empty() + + ch_multiqc_config = Channel.fromPath("${projectDir}/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + + PREPARE_GENOME(ch_fasta, ch_user_dict, ch_user_fai, ch_user_interval_list) + ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) + + ch_dict = PREPARE_GENOME.out.dict + ch_fai = PREPARE_GENOME.out.fai + ch_interval_list = PREPARE_GENOME.out.interval_list + if (tools && tools.split(',').contains('cnvkit')) { ch_samplesheet - .branch { meta, bam, bai, cram, crai -> + .branch { meta, bam, _bai, cram, crai -> bamfiles: bam - return [meta, bam] + return [meta, bam] cramfiles: cram - return [meta, cram, crai] + return [meta, cram, crai] } .set { ch_input_by_fmt } - SAMTOOLS_VIEW (ch_input_by_fmt.cramfiles, ch_fasta, [], "").bam + SAMTOOLS_VIEW(ch_input_by_fmt.cramfiles, ch_fasta, [], "").bam .mix(ch_input_by_fmt.bamfiles) .map { meta, bam -> - return [meta + [id:'panel'], bam] + return [meta + [id: 'panel'], bam] } .groupTuple() .map { meta, bam -> return [meta, [], bam] } - .set {ch_cnvkit_input} + .set { ch_cnvkit_input } - CNVKIT_BATCH ( ch_cnvkit_input, ch_fasta, [[:],[]], ch_cnvkit_targets, [[:],[]], true ) + CNVKIT_BATCH(ch_cnvkit_input, ch_fasta, [[:], []], ch_cnvkit_targets, [[:], []], true) ch_versions = ch_versions.mix(CNVKIT_BATCH.out.versions) } if (tools && tools.split(',').contains('germlinecnvcaller')) { ch_samplesheet - .map{meta, bam, bai, cram, crai -> - if (bam) return [ meta + [data_type:'bam'], bam, bai ] - if (cram) return [ meta + [data_type:'cram'], cram, crai ] + .map { meta, bam, bai, cram, crai -> + if (bam) { + return [meta + [data_type: 'bam'], bam, bai] + } + if (cram) { + return [meta + [data_type: 'cram'], cram, crai] + } } .set { ch_germlinecnvcaller_input } - GERMLINECNVCALLER_COHORT ( ch_dict, - ch_fai, - ch_fasta, - ch_germlinecnvcaller_input, - ch_gcnv_ploidy_priors, - ch_gcnv_mappable_regions, - ch_gcnv_segmental_duplications, - ch_gcnv_target_bed, - ch_gcnv_target_interval_list, - ch_gcnv_exclude_bed, - ch_gcnv_exclude_interval_list, - params.gcnv_model_name ) + GERMLINECNVCALLER_COHORT( + ch_germlinecnvcaller_input, + params.gcnv_model_name, + ch_dict, + ch_fai, + ch_fasta, + ch_gcnv_exclude_bed, + ch_gcnv_exclude_interval_list, + ch_gcnv_mappable_regions, + ch_gcnv_ploidy_priors, + ch_gcnv_segmental_duplications, + ch_gcnv_target_bed, + ch_gcnv_target_interval_list, + ) ch_versions = ch_versions.mix(GERMLINECNVCALLER_COHORT.out.versions) } if (tools && tools.split(',').contains('mutect2')) { - ch_mutect2_input = ch_samplesheet.map{meta, bam, bai, cram, crai -> - if (bam) return [ meta + [data_type:'bam'], bam, bai, [] ] - if (cram) return [ meta + [data_type:'cram'], cram, crai, [] ] + ch_mutect2_input = ch_samplesheet.map { meta, bam, bai, cram, crai -> + if (bam) { + return [meta + [data_type: 'bam'], bam, bai, []] + } + if (cram) { + return [meta + [data_type: 'cram'], cram, crai, []] + } } - BAM_CREATE_SOM_PON_GATK(ch_mutect2_input, - ch_fasta, - ch_fai, - ch_dict, + BAM_CREATE_SOM_PON_GATK( + ch_mutect2_input, params.mutect2_pon_name, - ch_mutect2_target_bed) + ch_dict, + ch_fai, + ch_fasta, + ch_mutect2_target_bed, + ) ch_versions = ch_versions.mix(BAM_CREATE_SOM_PON_GATK.out.versions) - } if (tools && tools.split(',').contains('gens')) { ch_samplesheet - .map{meta, bam, bai, cram, crai -> - if (bam) return [ meta + [data_type:'bam'], bam, bai ] - if (cram) return [ meta + [data_type:'cram'], cram, crai ] + .map { meta, bam, bai, cram, crai -> + if (bam) { + return [meta + [data_type: 'bam'], bam, bai] + } + if (cram) { + return [meta + [data_type: 'cram'], cram, crai] + } } .set { ch_gens_input } - GENS_PON(ch_dict, - ch_fai, - ch_fasta, - ch_gens_input, - params.gens_pon_name ) + GENS_PON( + ch_gens_input, + params.gens_pon_name, + ch_dict, + ch_fai, + ch_fasta, + ch_interval_list, + ) ch_versions = ch_versions.mix(GENS_PON.out.versions) @@ -181,58 +212,58 @@ workflow CREATEPANELREFS { storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_ceatepanelrefs_software_mqc_versions.yml', sort: true, - newLine: true - ).set { ch_collated_versions } + newLine: true, + ) + .set { ch_collated_versions } // // MODULE: MultiQC // - ch_multiqc_config = Channel.fromPath( - "$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? - Channel.fromPath(params.multiqc_config, checkIfExists: true) : - Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? - Channel.fromPath(params.multiqc_logo, checkIfExists: true) : - Channel.empty() - - summary_params = paramsSummaryMap( - workflow, parameters_schema: "nextflow_schema.json") + ch_multiqc_config = Channel.fromPath( + "${projectDir}/assets/multiqc_config.yml", + checkIfExists: true + ) + ch_multiqc_custom_config = params.multiqc_config + ? Channel.fromPath(params.multiqc_config, checkIfExists: true) + : Channel.empty() + ch_multiqc_logo = params.multiqc_logo + ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) + : Channel.empty() + + summary_params = paramsSummaryMap( + workflow, + parameters_schema: "nextflow_schema.json" + ) ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) ch_multiqc_files = ch_multiqc_files.mix( - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? - file(params.multiqc_methods_description, checkIfExists: true) : - file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value( - methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') + ) + ch_multiqc_custom_methods_description = params.multiqc_methods_description + ? file(params.multiqc_methods_description, checkIfExists: true) + : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description) + ) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) ch_multiqc_files = ch_multiqc_files.mix( ch_methods_description.collectFile( name: 'methods_description_mqc.yaml', - sort: true + sort: true, ) ) - MULTIQC ( + MULTIQC( ch_multiqc_files.collect(), ch_multiqc_config.toList(), ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList(), [], - [] + [], ) emit: multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] - + versions = ch_versions // channel: [ path(versions.yml) ] } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ From 11db4da5793638e059e257c3c005fdf67985b9e9 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 15 Apr 2025 10:20:41 +0200 Subject: [PATCH 179/234] no need to generate intervals --- main.nf | 3 +-- subworkflows/local/prepare_genome/main.nf | 11 +++++++++-- workflows/createpanelrefs.nf | 10 +++++----- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/main.nf b/main.nf index 40e3386..1c4abd3 100644 --- a/main.nf +++ b/main.nf @@ -43,7 +43,6 @@ include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_crea */ workflow { - main: // SUBWORKFLOW: Run initialisation tasks PIPELINE_INITIALISATION( @@ -58,7 +57,7 @@ workflow { // WORKFLOW: Run main workflow NFCORE_CREATEPANELREFS( PIPELINE_INITIALISATION.out.samplesheet, - params.tools, + params.tools ?: "no_tools", ) // SUBWORKFLOW: Run completion tasks diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 734ecbb..7e37828 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -8,9 +8,13 @@ workflow PREPARE_GENOME { user_dict // channel: [optional] [ val(meta), path(dict) ] user_fai // channel: [optional] [ val(meta), path(fai) ] user_gens_interval_list // channel: [optional] [ val(meta), path(interval_list) ] + tools // array: [mandatory] [ tools ] main: versions = Channel.empty() + dict = Channel.empty() + fai = Channel.empty() + interval_list = Channel.empty() // Prepare references fasta_for_fai = fasta @@ -41,9 +45,12 @@ workflow PREPARE_GENOME { files[1] ? null : [meta, files[0]] } - GATK4_PREPROCESSINTERVALS(fasta_for_interval_list, fai.collect(), dict.collect(), [[:], []], [[:], []]) + if (tools.contains('gens')) { - interval_list = user_gens_interval_list.mix(GATK4_PREPROCESSINTERVALS.out.interval_list).collect() + GATK4_PREPROCESSINTERVALS(fasta_for_interval_list, fai.collect(), dict.collect(), [[:], []], [[:], []]) + + interval_list = user_gens_interval_list.mix(GATK4_PREPROCESSINTERVALS.out.interval_list).collect() + } versions = versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) versions = versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 86cf207..4cd9386 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -91,14 +91,14 @@ workflow CREATEPANELREFS { ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) - PREPARE_GENOME(ch_fasta, ch_user_dict, ch_user_fai, ch_user_interval_list) + PREPARE_GENOME(ch_fasta, ch_user_dict, ch_user_fai, ch_user_interval_list, tools) ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) ch_dict = PREPARE_GENOME.out.dict ch_fai = PREPARE_GENOME.out.fai ch_interval_list = PREPARE_GENOME.out.interval_list - if (tools && tools.split(',').contains('cnvkit')) { + if (tools.split(',').contains('cnvkit')) { ch_samplesheet .branch { meta, bam, _bai, cram, crai -> @@ -124,7 +124,7 @@ workflow CREATEPANELREFS { ch_versions = ch_versions.mix(CNVKIT_BATCH.out.versions) } - if (tools && tools.split(',').contains('germlinecnvcaller')) { + if (tools.split(',').contains('germlinecnvcaller')) { ch_samplesheet .map { meta, bam, bai, cram, crai -> @@ -155,7 +155,7 @@ workflow CREATEPANELREFS { ch_versions = ch_versions.mix(GERMLINECNVCALLER_COHORT.out.versions) } - if (tools && tools.split(',').contains('mutect2')) { + if (tools.split(',').contains('mutect2')) { ch_mutect2_input = ch_samplesheet.map { meta, bam, bai, cram, crai -> if (bam) { @@ -178,7 +178,7 @@ workflow CREATEPANELREFS { ch_versions = ch_versions.mix(BAM_CREATE_SOM_PON_GATK.out.versions) } - if (tools && tools.split(',').contains('gens')) { + if (tools.split(',').contains('gens')) { ch_samplesheet .map { meta, bam, bai, cram, crai -> From d5c030babcd53fb4a3516cbfa9a6b4c971139ca9 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 16 Apr 2025 08:57:26 +0200 Subject: [PATCH 180/234] deal better with gens_intervals --- subworkflows/local/prepare_genome/main.nf | 21 +++++++++++++-------- workflows/createpanelrefs.nf | 6 +++--- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 7e37828..6c57126 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -2,6 +2,7 @@ include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtoo include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary' include { GATK4_PREPROCESSINTERVALS } from '../../../modules/nf-core/gatk4/preprocessintervals' +// Prepare references workflow PREPARE_GENOME { take: fasta // channel: [mandatory] [ val(meta), path(fasta) ] @@ -11,12 +12,13 @@ workflow PREPARE_GENOME { tools // array: [mandatory] [ tools ] main: - versions = Channel.empty() dict = Channel.empty() fai = Channel.empty() interval_list = Channel.empty() + versions = Channel.empty() - // Prepare references + // If more than one file, then it means that the user has provided a fai file + // So we can pass out a null channel and SAMTOOLS_FAIDX won't be run fasta_for_fai = fasta .mix(user_fai) .groupTuple() @@ -24,6 +26,8 @@ workflow PREPARE_GENOME { files[1] ? null : [meta, files[0]] } + // If more than one file, then it means that the user has provided a dict file + // So we can pass out a null channel and GATK4_CREATESEQUENCEDICTIONARY won't be run fasta_for_dict = fasta .mix(user_dict) .groupTuple() @@ -38,19 +42,20 @@ workflow PREPARE_GENOME { fai = user_fai.mix(SAMTOOLS_FAIDX.out.fai).collect() + + // If more than one file, then it means that the user has provided an interval list file + // So we can pass out a null channel and GATK4_PREPROCESSINTERVALS won't be run + fasta_for_interval_list = fasta .mix(user_gens_interval_list) .groupTuple() .map { meta, files -> - files[1] ? null : [meta, files[0]] + files[1] || !tools.split(',').contains('gens') ? null : [meta, files[0]] } - if (tools.contains('gens')) { - - GATK4_PREPROCESSINTERVALS(fasta_for_interval_list, fai.collect(), dict.collect(), [[:], []], [[:], []]) + GATK4_PREPROCESSINTERVALS(fasta_for_interval_list, fai.collect(), dict.collect(), [[:], []], [[:], []]) - interval_list = user_gens_interval_list.mix(GATK4_PREPROCESSINTERVALS.out.interval_list).collect() - } + interval_list = user_gens_interval_list.mix(GATK4_PREPROCESSINTERVALS.out.interval_list).collect() versions = versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) versions = versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 4cd9386..0ba1320 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -82,8 +82,8 @@ workflow CREATEPANELREFS { : Channel.value([[id: 'genome'], []]) // Initialize interval list specific parameters (GENS) - ch_user_interval_list = params.interval_list - ? Channel.fromPath(params.interval_list).map { interval_list -> [[id: 'genome'], interval_list] }.collect() + ch_user_gens_interval_list = params.gens_interval_list + ? Channel.fromPath(params.gens_interval_list).map { gens_interval_list -> [[id: 'genome'], gens_interval_list] }.collect() : Channel.empty() ch_multiqc_config = Channel.fromPath("${projectDir}/assets/multiqc_config.yml", checkIfExists: true) @@ -91,7 +91,7 @@ workflow CREATEPANELREFS { ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) - PREPARE_GENOME(ch_fasta, ch_user_dict, ch_user_fai, ch_user_interval_list, tools) + PREPARE_GENOME(ch_fasta, ch_user_dict, ch_user_fai, ch_user_gens_interval_list, tools) ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) ch_dict = PREPARE_GENOME.out.dict From c0440a2c5773ab9859f76fb6a04708c9a03b862c Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 16 Apr 2025 09:30:49 +0200 Subject: [PATCH 181/234] fix GERMLINECNVCALLER_COHORT --- conf/modules/prepare_genome.config | 2 +- .../local/germlinecnvcaller_cohort/main.nf | 4 ++-- subworkflows/local/prepare_genome/main.nf | 14 +++++++------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/conf/modules/prepare_genome.config b/conf/modules/prepare_genome.config index 7f3b851..091f595 100644 --- a/conf/modules/prepare_genome.config +++ b/conf/modules/prepare_genome.config @@ -20,7 +20,7 @@ process { ] } - withName: 'GATK4_PREPROCESSINTERVALS' { + withName: 'GATK4_PREPROCESSINTERVALS_GENS' { ext.args = { ["--imr OVERLAPPING_ONLY", "--bin-length ${params.gens_bin_length}"].join(" ") } diff --git a/subworkflows/local/germlinecnvcaller_cohort/main.nf b/subworkflows/local/germlinecnvcaller_cohort/main.nf index 129f954..4f2f25a 100644 --- a/subworkflows/local/germlinecnvcaller_cohort/main.nf +++ b/subworkflows/local/germlinecnvcaller_cohort/main.nf @@ -16,14 +16,14 @@ workflow GERMLINECNVCALLER_COHORT { ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] val_pon_name // string: [optional] name for panel of normals ch_dict // channel: [optional] [ val(meta), path(dict) ] - ch_exclude_bed // channel: [optional] [ val(meta), path(bed) ] ch_fai // channel: [optional] [ val(meta), path(fai) ] ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_exclude_bed // channel: [optional] [ val(meta), path(bed) ] + ch_user_exclude_interval_list // channel: [optional] [ val(meta), path(intervals) ] ch_mappable_regions // channel: [optional] [ val(meta), path(bed) ] ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] ch_segmental_duplications // channel: [optional] [ val(meta), path(bed) ] ch_target_bed // channel: [optional] [ val(meta), path(bed) ] - ch_user_exclude_interval_list // channel: [optional] [ val(meta), path(intervals) ] ch_user_target_interval_list // channel: [optional] [ val(meta), path(intervals) ] main: diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 6c57126..5d5735f 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -1,6 +1,6 @@ -include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx' -include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary' -include { GATK4_PREPROCESSINTERVALS } from '../../../modules/nf-core/gatk4/preprocessintervals' +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx' +include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary' +include { GATK4_PREPROCESSINTERVALS as GATK4_PREPROCESSINTERVALS_GENS } from '../../../modules/nf-core/gatk4/preprocessintervals' // Prepare references workflow PREPARE_GENOME { @@ -44,7 +44,7 @@ workflow PREPARE_GENOME { // If more than one file, then it means that the user has provided an interval list file - // So we can pass out a null channel and GATK4_PREPROCESSINTERVALS won't be run + // So we can pass out a null channel and GATK4_PREPROCESSINTERVALS_GENS won't be run fasta_for_interval_list = fasta .mix(user_gens_interval_list) @@ -53,12 +53,12 @@ workflow PREPARE_GENOME { files[1] || !tools.split(',').contains('gens') ? null : [meta, files[0]] } - GATK4_PREPROCESSINTERVALS(fasta_for_interval_list, fai.collect(), dict.collect(), [[:], []], [[:], []]) + GATK4_PREPROCESSINTERVALS_GENS(fasta_for_interval_list, fai.collect(), dict.collect(), [[:], []], [[:], []]) - interval_list = user_gens_interval_list.mix(GATK4_PREPROCESSINTERVALS.out.interval_list).collect() + interval_list = user_gens_interval_list.mix(GATK4_PREPROCESSINTERVALS_GENS.out.interval_list).collect() versions = versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) - versions = versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) + versions = versions.mix(GATK4_PREPROCESSINTERVALS_GENS.out.versions) versions = versions.mix(SAMTOOLS_FAIDX.out.versions) emit: From 7163991086b11a0ddc46fb2a8ae465ab5f4a8026 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 16 Apr 2025 09:40:41 +0200 Subject: [PATCH 182/234] fix snapshot --- tests/gens_pon.nf.test.snap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gens_pon.nf.test.snap b/tests/gens_pon.nf.test.snap index 4a4effc..2803a69 100644 --- a/tests/gens_pon.nf.test.snap +++ b/tests/gens_pon.nf.test.snap @@ -9,7 +9,7 @@ "GATK4_CREATEREADCOUNTPANELOFNORMALS": { "gatk4": "4.5.0.0" }, - "GATK4_PREPROCESSINTERVALS": { + "GATK4_PREPROCESSINTERVALS_GENS": { "gatk4": "4.5.0.0" }, "Workflow": { From 656c9306560a296425d99c1abf9c955efaad9d36 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 16 Apr 2025 11:23:23 +0200 Subject: [PATCH 183/234] add creation of mutect2 target bed --- conf/modules/prepare_genome.config | 9 + modules.json | 5 + modules/nf-core/gawk/environment.yml | 7 + modules/nf-core/gawk/main.nf | 70 +++++++ modules/nf-core/gawk/meta.yml | 63 ++++++ modules/nf-core/gawk/tests/main.nf.test | 198 ++++++++++++++++++ modules/nf-core/gawk/tests/main.nf.test.snap | 200 +++++++++++++++++++ modules/nf-core/gawk/tests/nextflow.config | 6 + subworkflows/local/prepare_genome/main.nf | 47 +++-- tests/mutect2.nf.test | 14 +- tests/mutect2.nf.test.snap | 19 +- workflows/createpanelrefs.nf | 20 +- 12 files changed, 619 insertions(+), 39 deletions(-) create mode 100644 modules/nf-core/gawk/environment.yml create mode 100644 modules/nf-core/gawk/main.nf create mode 100644 modules/nf-core/gawk/meta.yml create mode 100644 modules/nf-core/gawk/tests/main.nf.test create mode 100644 modules/nf-core/gawk/tests/main.nf.test.snap create mode 100644 modules/nf-core/gawk/tests/nextflow.config diff --git a/conf/modules/prepare_genome.config b/conf/modules/prepare_genome.config index 091f595..161ad9f 100644 --- a/conf/modules/prepare_genome.config +++ b/conf/modules/prepare_genome.config @@ -11,6 +11,15 @@ */ process { + withName: 'BUILD_INTERVALS' { + ext.args = { "-v FS='\t' -v OFS='\t' '{ print \$1, \"0\", \$2 }'" } + ext.suffix = { "bed" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/intervals/mutect2_target_bed" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } withName: 'GATK4_CREATESEQUENCEDICTIONARY' { publishDir = [ diff --git a/modules.json b/modules.json index 95a8743..ff521c2 100644 --- a/modules.json +++ b/modules.json @@ -80,6 +80,11 @@ "git_sha": "08108058ea36a63f141c25c4e75f9f872a5b2296", "installed_by": ["modules"] }, + "gawk": { + "branch": "master", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "installed_by": ["modules"] + }, "multiqc": { "branch": "master", "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad", diff --git a/modules/nf-core/gawk/environment.yml b/modules/nf-core/gawk/environment.yml new file mode 100644 index 0000000..f52109e --- /dev/null +++ b/modules/nf-core/gawk/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::gawk=5.3.0 diff --git a/modules/nf-core/gawk/main.nf b/modules/nf-core/gawk/main.nf new file mode 100644 index 0000000..615b2ce --- /dev/null +++ b/modules/nf-core/gawk/main.nf @@ -0,0 +1,70 @@ +process GAWK { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.3.0' : + 'biocontainers/gawk:5.3.0' }" + + input: + tuple val(meta), path(input, arity: '0..*') + path(program_file) + val(disable_redirect_output) + + output: + tuple val(meta), path("*.${suffix}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' // args is used for the main arguments of the tool + def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.collect{ it.getExtension()}.get(0)}" // use the first extension of the input files + + program = program_file ? "-f ${program_file}" : "${args2}" + lst_gz = input.findResults{ it.getExtension().endsWith("gz") ? it.toString() : null } + unzip = lst_gz ? "gunzip -q -f ${lst_gz.join(" ")}" : "" + input_cmd = input.collect { it.toString() - ~/\.gz$/ }.join(" ") + output_cmd = suffix.endsWith("gz") ? "| gzip > ${prefix}.${suffix}" : "> ${prefix}.${suffix}" + output = disable_redirect_output ? "" : output_cmd + cleanup = lst_gz ? "rm ${lst_gz.collect{ it - ~/\.gz$/ }.join(" ")}" : "" + + input.collect{ + assert it.name != "${prefix}.${suffix}" : "Input and output names are the same, set prefix in module configuration to disambiguate!" + } + + """ + ${unzip} + + awk \\ + ${args} \\ + ${program} \\ + ${input_cmd} \\ + ${output} + + ${cleanup} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.getExtension()}" + def create_cmd = suffix.endsWith("gz") ? "echo '' | gzip >" : "touch" + + """ + ${create_cmd} ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gawk/meta.yml b/modules/nf-core/gawk/meta.yml new file mode 100644 index 0000000..34c50b1 --- /dev/null +++ b/modules/nf-core/gawk/meta.yml @@ -0,0 +1,63 @@ +name: "gawk" +description: | + If you are like many computer users, you would frequently like to make changes in various text files + wherever certain patterns appear, or extract data from parts of certain lines while discarding the rest. + The job is easy with awk, especially the GNU implementation gawk. +keywords: + - gawk + - awk + - txt + - text + - file parsing +tools: + - "gawk": + description: "GNU awk" + homepage: "https://www.gnu.org/software/gawk/" + documentation: "https://www.gnu.org/software/gawk/manual/" + tool_dev_url: "https://www.gnu.org/prep/ftp.html" + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: The input file - Specify the logic that needs to be executed on + this file on the `ext.args2` or in the program file. + If the files have a `.gz` extension, they will be unzipped using `zcat`. + pattern: "*" + - - program_file: + type: file + description: Optional file containing logic for awk to execute. If you don't + wish to use a file, you can use `ext.args2` to specify the logic. + pattern: "*" + - - disable_redirect_output: + type: boolean + description: Disable the redirection of awk output to a given file. This is + useful if you want to use awk's built-in redirect to write files instead + of the shell's redirect. +output: + - output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${suffix}": + type: file + description: The output file - if using shell redirection, specify the name of this + file using `ext.prefix` and the extension using `ext.suffix`. Otherwise, ensure + the awk program produces files with the extension in `ext.suffix`. + pattern: "*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/gawk/tests/main.nf.test b/modules/nf-core/gawk/tests/main.nf.test new file mode 100644 index 0000000..5446227 --- /dev/null +++ b/modules/nf-core/gawk/tests/main.nf.test @@ -0,0 +1,198 @@ +nextflow_process { + + name "Test Process GAWK" + script "../main.nf" + process "GAWK" + + tag "modules" + tag "modules_nfcore" + tag "gawk" + + config "./nextflow.config" + + test("Convert fasta to bed") { + when { + params { + gawk_suffix = "bed" + gawk_args2 = '\'BEGIN { FS = OFS = "\t"}; { print \$1, "0", \$2 }\'' + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = [] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Convert fasta to bed with program file") { + when { + params { + gawk_suffix = "bed" + gawk_args2 = "" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = Channel.of('BEGIN { FS = OFS = "\t"}; { print \$1, "0", \$2 }').collectFile(name:"program.awk") + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Convert fasta to bed using awk redirect instead of shell redirect") { + when { + params { + gawk_suffix = "bed" + gawk_args2 = '\'BEGIN { FS = OFS = "\t"}; { print \$1, "0", \$2 > "test.bed" }\'' + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = [] + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Extract first column from multiple files") { + when { + params { + gawk_suffix = "bed" + gawk_args2 = "" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [file(params.modules_testdata_base_path + 'generic/txt/hello.txt', checkIfExists: true), + file(params.modules_testdata_base_path + 'generic/txt/species_names.txt', checkIfExists: true)] + ] + input[1] = Channel.of('BEGIN {FS=" "}; {print \$1}').collectFile(name:"program.awk") + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Unzip files before processing") { + when { + params { + gawk_suffix = "bed" + gawk_args2 = "" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_chrM.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true)] + ] + input[1] = Channel.of('/^#CHROM/ { print \$1, \$10 }').collectFile(name:"column_header.awk") + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Compress after processing") { + when { + params { + gawk_suffix = "txt.gz" + gawk_args2 = '\'BEGIN { FS = OFS = "\t"}; { print \$1, "0", \$2 }\'' + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = [] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Input and output files are similar") { + when { + params { + gawk_suffix = "txt" + gawk_args = "" + gawk_args2 = "" + } + process { + """ + input[0] = [ + [ id:'hello' ], // meta map + [file(params.modules_testdata_base_path + 'generic/txt/hello.txt', checkIfExists: true), + file(params.modules_testdata_base_path + 'generic/txt/species_names.txt', checkIfExists: true)] + ] + input[1] = Channel.of('BEGIN {FS=" "}; {print \$1}').collectFile(name:"program.awk") + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.failed }, + { assert process.errorReport.contains("Input and output names are the same, set prefix in module configuration to disambiguate!") } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/gawk/tests/main.nf.test.snap b/modules/nf-core/gawk/tests/main.nf.test.snap new file mode 100644 index 0000000..d8e8ac7 --- /dev/null +++ b/modules/nf-core/gawk/tests/main.nf.test.snap @@ -0,0 +1,200 @@ +{ + "Compress after processing": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.txt.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ], + "output": [ + [ + { + "id": "test" + }, + "test.txt.gz:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-27T17:11:20.054143406" + }, + "Convert fasta to bed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ], + "output": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T13:14:02.347809811" + }, + "Convert fasta to bed with program file": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ], + "output": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T13:14:11.894616209" + }, + "Extract first column from multiple files": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,566c51674bd643227bb2d83e0963376d" + ] + ], + "1": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ], + "output": [ + [ + { + "id": "test" + }, + "test.bed:md5,566c51674bd643227bb2d83e0963376d" + ] + ], + "versions": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T22:04:47.729300129" + }, + "Unzip files before processing": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,1e31ebd4a060aab5433bbbd9ab24e403" + ] + ], + "1": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ], + "output": [ + [ + { + "id": "test" + }, + "test.bed:md5,1e31ebd4a060aab5433bbbd9ab24e403" + ] + ], + "versions": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-19T22:08:19.533527657" + }, + "Convert fasta to bed using awk redirect instead of shell redirect": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ], + "output": [ + [ + { + "id": "test" + }, + "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-03-05T08:31:09.88842854" + } +} \ No newline at end of file diff --git a/modules/nf-core/gawk/tests/nextflow.config b/modules/nf-core/gawk/tests/nextflow.config new file mode 100644 index 0000000..895709a --- /dev/null +++ b/modules/nf-core/gawk/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: GAWK { + ext.suffix = params.gawk_suffix + ext.args2 = params.gawk_args2 + } +} diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 5d5735f..1829907 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -1,6 +1,7 @@ -include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx' include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary' include { GATK4_PREPROCESSINTERVALS as GATK4_PREPROCESSINTERVALS_GENS } from '../../../modules/nf-core/gatk4/preprocessintervals' +include { GAWK as BUILD_INTERVALS } from '../../../modules/nf-core/gawk' +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx' // Prepare references workflow PREPARE_GENOME { @@ -8,7 +9,8 @@ workflow PREPARE_GENOME { fasta // channel: [mandatory] [ val(meta), path(fasta) ] user_dict // channel: [optional] [ val(meta), path(dict) ] user_fai // channel: [optional] [ val(meta), path(fai) ] - user_gens_interval_list // channel: [optional] [ val(meta), path(interval_list) ] + user_gens_interval_list // channel: [optional] [ val(meta), path(gens_interval_list) ] + user_mutect2_target_bed // channel: [optional] [ val(meta), path(mutect2_target_bed) ] tools // array: [mandatory] [ tools ] main: @@ -17,15 +19,6 @@ workflow PREPARE_GENOME { interval_list = Channel.empty() versions = Channel.empty() - // If more than one file, then it means that the user has provided a fai file - // So we can pass out a null channel and SAMTOOLS_FAIDX won't be run - fasta_for_fai = fasta - .mix(user_fai) - .groupTuple() - .map { meta, files -> - files[1] ? null : [meta, files[0]] - } - // If more than one file, then it means that the user has provided a dict file // So we can pass out a null channel and GATK4_CREATESEQUENCEDICTIONARY won't be run fasta_for_dict = fasta @@ -35,13 +28,22 @@ workflow PREPARE_GENOME { files[1] ? null : [meta, files[0]] } - SAMTOOLS_FAIDX(fasta_for_fai, [[:], []]) GATK4_CREATESEQUENCEDICTIONARY(fasta_for_dict) dict = user_dict.mix(GATK4_CREATESEQUENCEDICTIONARY.out.dict).collect() - fai = user_fai.mix(SAMTOOLS_FAIDX.out.fai).collect() + // If more than one file, then it means that the user has provided a fai file + // So we can pass out a null channel and SAMTOOLS_FAIDX won't be run + fasta_for_fai = fasta + .mix(user_fai) + .groupTuple() + .map { meta, files -> + files[1] ? null : [meta, files[0]] + } + SAMTOOLS_FAIDX(fasta_for_fai, [[:], []]) + + fai = user_fai.mix(SAMTOOLS_FAIDX.out.fai).collect() // If more than one file, then it means that the user has provided an interval list file // So we can pass out a null channel and GATK4_PREPROCESSINTERVALS_GENS won't be run @@ -55,8 +57,22 @@ workflow PREPARE_GENOME { GATK4_PREPROCESSINTERVALS_GENS(fasta_for_interval_list, fai.collect(), dict.collect(), [[:], []], [[:], []]) - interval_list = user_gens_interval_list.mix(GATK4_PREPROCESSINTERVALS_GENS.out.interval_list).collect() + gens_interval_list = user_gens_interval_list.mix(GATK4_PREPROCESSINTERVALS_GENS.out.interval_list).collect() + + // If more than one file, then it means that the user has provided a fai file + // So we can pass out a null channel and SAMTOOLS_FAIDX won't be run + fai_for_intervals = fai + .mix(user_mutect2_target_bed) + .groupTuple() + .map { meta, files -> + files[1] ? null : [meta, files[0]] + } + + BUILD_INTERVALS(fai_for_intervals, [], false) + + mutect2_target_bed = user_mutect2_target_bed.mix(BUILD_INTERVALS.out.output).collect() + versions = versions.mix(BUILD_INTERVALS.out.versions) versions = versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) versions = versions.mix(GATK4_PREPROCESSINTERVALS_GENS.out.versions) versions = versions.mix(SAMTOOLS_FAIDX.out.versions) @@ -64,6 +80,7 @@ workflow PREPARE_GENOME { emit: dict fai - interval_list + gens_interval_list + mutect2_target_bed versions } diff --git a/tests/mutect2.nf.test b/tests/mutect2.nf.test index d6f5cb0..5ca7b77 100644 --- a/tests/mutect2.nf.test +++ b/tests/mutect2.nf.test @@ -11,9 +11,6 @@ nextflow_pipeline { mutect2_pon_name = 'test' outdir = "$outputDir" tools = 'mutect2' - mutect2_target_bed = null - fai = null - dict = null } } @@ -38,14 +35,15 @@ nextflow_pipeline { } } - test("-profile test --tools mutect2 --mutect2_pon_name test --input tests/csv/1.0.0/cram.csv") { + test("-profile test --tools mutect2 --mutect2_target_bed null --mutect2_pon_name test --input tests/csv/1.0.0/cram.csv") { when { params { - input = "${projectDir}/tests/csv/1.0.0/cram.csv" - mutect2_pon_name = 'test' - outdir = "$outputDir" - tools = 'mutect2' + input = "${projectDir}/tests/csv/1.0.0/cram.csv" + mutect2_pon_name = 'test' + mutect2_target_bed = null + outdir = "$outputDir" + tools = 'mutect2' } } diff --git a/tests/mutect2.nf.test.snap b/tests/mutect2.nf.test.snap index 0a3886b..89add30 100644 --- a/tests/mutect2.nf.test.snap +++ b/tests/mutect2.nf.test.snap @@ -1,8 +1,11 @@ { - "-profile test --tools mutect2 --mutect2_pon_name test --input tests/csv/1.0.0/cram.csv": { + "-profile test --tools mutect2 --mutect2_target_bed null --mutect2_pon_name test --input tests/csv/1.0.0/cram.csv": { "content": [ - 5, + 6, { + "BUILD_INTERVALS": { + "gawk": "5.3.0" + }, "GATK4_CREATESOMATICPANELOFNORMALS": { "gatk4": "4.5.0.0" }, @@ -29,11 +32,12 @@ "gatk4/test.vcf.gz.tbi", "gatk4/test/__tiledb_workspace.tdb", "gatk4/test/callset.json", - "gatk4/test/chr21$2$23354000", - "gatk4/test/chr21$24132500$24910998", - "gatk4/test/chr21$25689498$46709983", + "gatk4/test/chr21$1$46709983", "gatk4/test/vcfheader.vcf", "gatk4/test/vidmap.json", + "intervals", + "intervals/mutect2_target_bed", + "intervals/mutect2_target_bed/genome.bed", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -48,6 +52,7 @@ [ "sample3.vcf.gz.stats:md5,a05ace4138fc5cb993ed912d654ec22d", "sample4.vcf.gz.stats:md5,080e6d0e254e582dfb9d5916c9637391", + "genome.bed:md5,472d213cfcde96565699779d5bfc0e32", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], @@ -55,7 +60,7 @@ "nf-test": "0.9.2", "nextflow": "24.10.5" }, - "timestamp": "2025-04-11T17:42:06.529424623" + "timestamp": "2025-04-16T11:05:19.08294075" }, "-profile test --tools mutect2 --mutect2_pon_name test": { "content": [ @@ -115,4 +120,4 @@ }, "timestamp": "2025-04-09T15:07:42.422460888" } -} +} \ No newline at end of file diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 0ba1320..a52d457 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -76,10 +76,11 @@ workflow CREATEPANELREFS { ch_gcnv_segmental_duplications = params.gcnv_segmental_duplications ? Channel.fromPath(params.gcnv_segmental_duplications).collect() : Channel.value([[id: 'genome'], []]) + // Initialize mutect2 specific parameters - ch_mutect2_target_bed = params.mutect2_target_bed - ? Channel.fromPath(params.mutect2_target_bed).collect() - : Channel.value([[id: 'genome'], []]) + ch_user_mutect2_target_bed = params.mutect2_target_bed + ? Channel.fromPath(params.mutect2_target_bed).map { targets -> [[id: 'genome'], targets] }.collect() + : Channel.empty() // Initialize interval list specific parameters (GENS) ch_user_gens_interval_list = params.gens_interval_list @@ -91,12 +92,13 @@ workflow CREATEPANELREFS { ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) - PREPARE_GENOME(ch_fasta, ch_user_dict, ch_user_fai, ch_user_gens_interval_list, tools) + PREPARE_GENOME(ch_fasta, ch_user_dict, ch_user_fai, ch_user_gens_interval_list, ch_user_mutect2_target_bed, tools) ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) ch_dict = PREPARE_GENOME.out.dict ch_fai = PREPARE_GENOME.out.fai - ch_interval_list = PREPARE_GENOME.out.interval_list + ch_interval_list = PREPARE_GENOME.out.gens_interval_list + ch_mutect2_target_bed = PREPARE_GENOME.out.mutect2_target_bed if (tools.split(',').contains('cnvkit')) { @@ -168,11 +170,11 @@ workflow CREATEPANELREFS { BAM_CREATE_SOM_PON_GATK( ch_mutect2_input, - params.mutect2_pon_name, - ch_dict, - ch_fai, ch_fasta, - ch_mutect2_target_bed, + ch_fai, + ch_dict, + params.mutect2_pon_name, + ch_mutect2_target_bed.map { _meta, target -> [target] }, ) ch_versions = ch_versions.mix(BAM_CREATE_SOM_PON_GATK.out.versions) From f0ee03ea573b55f61a6eed38ea5fdfa87aa25cd2 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 16 Apr 2025 11:40:35 +0200 Subject: [PATCH 184/234] no need to generate bed when not running mutect2 --- subworkflows/local/prepare_genome/main.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 1829907..49272b7 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -16,7 +16,8 @@ workflow PREPARE_GENOME { main: dict = Channel.empty() fai = Channel.empty() - interval_list = Channel.empty() + gens_interval_list = Channel.empty() + mutect2_target_bed = Channel.empty() versions = Channel.empty() // If more than one file, then it means that the user has provided a dict file @@ -65,7 +66,7 @@ workflow PREPARE_GENOME { .mix(user_mutect2_target_bed) .groupTuple() .map { meta, files -> - files[1] ? null : [meta, files[0]] + files[1] || !tools.split(',').contains('mutect2') ? null : [meta, files[0]] } BUILD_INTERVALS(fai_for_intervals, [], false) From b1a7f50fc1479a18fbc554fbf956d423fbbf6c64 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 16 Apr 2025 16:50:02 +0200 Subject: [PATCH 185/234] heavy refactoring --- main.nf | 172 ++++++++++++++++++- workflows/createpanelrefs.nf | 271 ------------------------------ workflows/createpanelrefs/main.nf | 138 +++++++++++++++ 3 files changed, 305 insertions(+), 276 deletions(-) delete mode 100644 workflows/createpanelrefs.nf create mode 100644 workflows/createpanelrefs/main.nf diff --git a/main.nf b/main.nf index 1c4abd3..b423be2 100644 --- a/main.nf +++ b/main.nf @@ -25,6 +25,7 @@ params.gcnv_ploidy_priors = getGenomeAttribute('gcnv_ploidy_priors') params.gcnv_segmental_duplications = getGenomeAttribute('gcnv_segmental_duplications') params.gcnv_target_bed = getGenomeAttribute('gcnv_target_bed') params.gcnv_target_interval_list = getGenomeAttribute('gcnv_target_interval_list') +params.gens_interval_list = getGenomeAttribute('gens_interval_list') params.mutect2_target_bed = getGenomeAttribute('mutect2_target_bed') /* @@ -32,9 +33,16 @@ params.mutect2_target_bed = getGenomeAttribute('mutect2_target_bed') IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + include { CREATEPANELREFS } from './workflows/createpanelrefs' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline' +include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' +include { MULTIQC } from './modules/nf-core/multiqc' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from './subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from './subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from './subworkflows/local/utils_nfcore_createpanelrefs_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -43,6 +51,59 @@ include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_crea */ workflow { + versions = Channel.empty() + multiqc_files = Channel.empty() + + // Initialize file channels based on params, defined in the params.genomes[params.genome] scope + user_dict = params.dict + ? Channel.fromPath(params.dict).map { dict -> [[id: 'genome'], dict] }.collect() + : Channel.empty() + + user_fai = params.fai + ? Channel.fromPath(params.fai).map { fai -> [[id: 'genome'], fai] }.collect() + : Channel.empty() + + fasta = params.fasta + ? Channel.fromPath(params.fasta).map { fasta -> [[id: 'genome'], fasta] }.collect() + : Channel.empty() + + // Initialize cnvkit specific parameters + cnvkit_targets = params.cnvkit_targets + ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id: 'genome'], targets] }.collect() + : Channel.value([[id: 'genome'], []]) + + // Initialize gens interval list specific parameters + user_gens_interval_list = params.gens_interval_list + ? Channel.fromPath(params.gens_interval_list).map { gens_interval_list -> [[id: 'genome'], gens_interval_list] }.collect() + : Channel.empty() + + // Initialize germlinecnvcaller specific parameters + gcnv_exclude_bed = params.gcnv_exclude_bed + ? Channel.fromPath(params.gcnv_exclude_bed).map { exclude -> [[id: 'genome'], exclude] }.collect() + : Channel.value([[id: 'genome'], []]) + gcnv_exclude_interval_list = params.gcnv_exclude_interval_list + ? Channel.fromPath(params.gcnv_exclude_interval_list).map { exclude -> [[id: 'genome'], exclude] }.collect() + : Channel.value([[id: 'genome'], []]) + gcnv_mappable_regions = params.gcnv_mappable_regions + ? Channel.fromPath(params.gcnv_mappable_regions).collect() + : Channel.value([[id: 'genome'], []]) + gcnv_ploidy_priors = params.gcnv_ploidy_priors + ? Channel.fromPath(params.gcnv_ploidy_priors).collect() + : Channel.empty() + gcnv_target_bed = params.gcnv_target_bed + ? Channel.fromPath(params.gcnv_target_bed).map { targets -> [[id: 'genome'], targets] }.collect() + : Channel.value([[id: 'genome'], []]) + gcnv_target_interval_list = params.gcnv_target_interval_list + ? Channel.fromPath(params.gcnv_target_interval_list).map { targets -> [[id: 'genome'], targets] }.collect() + : Channel.value([[id: 'genome'], []]) + gcnv_segmental_duplications = params.gcnv_segmental_duplications + ? Channel.fromPath(params.gcnv_segmental_duplications).collect() + : Channel.value([[id: 'genome'], []]) + + // Initialize mutect2 specific parameters + user_mutect2_target_bed = params.mutect2_target_bed + ? Channel.fromPath(params.mutect2_target_bed).map { targets -> [[id: 'genome'], targets] }.collect() + : Channel.empty() // SUBWORKFLOW: Run initialisation tasks PIPELINE_INITIALISATION( @@ -54,10 +115,95 @@ workflow { params.input, ) + PREPARE_GENOME(fasta, user_dict, user_fai, user_gens_interval_list, user_mutect2_target_bed, params.tools ?: "no_tools") + + dict = PREPARE_GENOME.out.dict + fai = PREPARE_GENOME.out.fai + gens_interval_list = PREPARE_GENOME.out.gens_interval_list + mutect2_target_bed = PREPARE_GENOME.out.mutect2_target_bed + + + multiqc_config = Channel.fromPath("${projectDir}/assets/multiqc_config.yml", checkIfExists: true) + multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() + multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + + versions = versions.mix(PREPARE_GENOME.out.versions) + // WORKFLOW: Run main workflow NFCORE_CREATEPANELREFS( PIPELINE_INITIALISATION.out.samplesheet, params.tools ?: "no_tools", + params.gcnv_model_name, + params.gens_pon_name, + params.mutect2_pon_name, + fasta, + dict, + fai, + cnvkit_targets, + gcnv_exclude_bed, + gcnv_exclude_interval_list, + gcnv_mappable_regions, + gcnv_ploidy_priors, + gcnv_segmental_duplications, + gcnv_target_bed, + gcnv_target_interval_list, + gens_interval_list, + mutect2_target_bed, + ) + + versions = versions.mix(NFCORE_CREATEPANELREFS.out.versions) + + // Collate and save software versions + collated_versions = softwareVersionsToYAML(versions).collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_ceatepanelrefs_software_mqc_versions.yml', + sort: true, + newLine: true, + ) + + // MODULE: MultiQC + multiqc_config = Channel.fromPath( + "${projectDir}/assets/multiqc_config.yml", + checkIfExists: true + ) + multiqc_custom_config = params.multiqc_config + ? Channel.fromPath(params.multiqc_config, checkIfExists: true) + : Channel.empty() + multiqc_logo = params.multiqc_logo + ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) + : Channel.empty() + + summary_params = paramsSummaryMap( + workflow, + parameters_schema: "nextflow_schema.json" + ) + workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + multiqc_files = multiqc_files.mix( + workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') + ) + multiqc_custom_methods_description = params.multiqc_methods_description + ? file(params.multiqc_methods_description, checkIfExists: true) + : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) + methods_description = Channel.value( + methodsDescriptionText(multiqc_custom_methods_description) + ) + + multiqc_files = multiqc_files.mix(collated_versions) + multiqc_files = multiqc_files.mix( + methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: true, + ) + ) + + MULTIQC( + multiqc_files.collect(), + multiqc_config.toList(), + multiqc_custom_config.toList(), + multiqc_logo.toList(), + [], + [], ) // SUBWORKFLOW: Run completion tasks @@ -68,7 +214,7 @@ workflow { params.outdir, params.monochrome_logs, params.hook_url, - NFCORE_CREATEPANELREFS.out.multiqc_report, + MULTIQC.out.report.toList(), ) } @@ -81,15 +227,31 @@ workflow { // WORKFLOW: Run main analysis pipeline depending on type of input workflow NFCORE_CREATEPANELREFS { take: - samplesheet // channel: samplesheet read in from --input - tools + samplesheet // channel: samplesheet read in from --input + tools // string: comma separated list of tools to run + gcnv_model_name // string: name of gcnv model + gens_pon_name // string: name of gens pon + mutect2_pon_name // string: name of mutect2 pon + fasta // channel: [meta, fasta] + dict // channel: [meta, dict] + fai // channel: [meta, fai] + cnvkit_targets // channel: [meta, cnvkit_targets] + gcnv_exclude_bed // channel: [meta, gcnv_exclude_bed] + gcnv_exclude_interval_list // channel: [meta, gcnv_exclude_interval_list] + gcnv_mappable_regions // channel: [meta, gcnv_mappable_regions] + gcnv_ploidy_priors // channel: [meta, gcnv_ploidy_priors] + gcnv_segmental_duplications // channel: [meta, gcnv_segmental_duplications] + gcnv_target_bed // channel: [meta, gcnv_target_bed] + gcnv_target_interval_list // channel: [meta, gcnv_target_interval_list] + gens_interval_list // channel: [meta, gens_interval_list] + mutect2_target_bed // channel: [meta, mutect2_target_bed] main: // WORKFLOW: Run pipeline - CREATEPANELREFS(samplesheet, tools) + CREATEPANELREFS(samplesheet, tools, gcnv_model_name, gens_pon_name, mutect2_pon_name, fasta, dict, fai, cnvkit_targets, gcnv_exclude_bed, gcnv_exclude_interval_list, gcnv_mappable_regions, gcnv_ploidy_priors, gcnv_segmental_duplications, gcnv_target_bed, gcnv_target_interval_list, gens_interval_list, mutect2_target_bed) emit: - multiqc_report = CREATEPANELREFS.out.multiqc_report // channel: /path/to/multiqc_report.html + versions = CREATEPANELREFS.out.versions // channel: versions.yml } /* diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf deleted file mode 100644 index a52d457..0000000 --- a/workflows/createpanelrefs.nf +++ /dev/null @@ -1,271 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_createpanelrefs_pipeline' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { GENS_PON } from '../subworkflows/local/gens_pon' -include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' -include { BAM_CREATE_SOM_PON_GATK } from '../subworkflows/nf-core/bam_create_som_pon_gatk' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT MODULES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch' -include { SAMTOOLS_VIEW } from '../modules/nf-core/samtools/view' -include { MULTIQC } from '../modules/nf-core/multiqc' - -workflow CREATEPANELREFS { - take: - ch_samplesheet // channel: samplesheet read in from --input - tools - - main: - - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() - - // Initialize file channels based on params, defined in the params.genomes[params.genome] scope - ch_user_dict = params.dict - ? Channel.fromPath(params.dict).map { dict -> [[id: 'genome'], dict] }.collect() - : Channel.empty() - ch_user_fai = params.fai - ? Channel.fromPath(params.fai).map { fai -> [[id: 'genome'], fai] }.collect() - : Channel.empty() - ch_fasta = params.fasta - ? Channel.fromPath(params.fasta).map { fasta -> [[id: 'genome'], fasta] }.collect() - : Channel.empty() - // Initialize cnvkit specific parameters - ch_cnvkit_targets = params.cnvkit_targets - ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id: 'genome'], targets] }.collect() - : Channel.value([[id: 'genome'], []]) - // Initialize germlinecnvcaller specific parameters - ch_gcnv_exclude_bed = params.gcnv_exclude_bed - ? Channel.fromPath(params.gcnv_exclude_bed).map { exclude -> [[id: 'genome'], exclude] }.collect() - : Channel.value([[id: 'genome'], []]) - ch_gcnv_exclude_interval_list = params.gcnv_exclude_interval_list - ? Channel.fromPath(params.gcnv_exclude_interval_list).map { exclude -> [[id: 'genome'], exclude] }.collect() - : Channel.value([[id: 'genome'], []]) - ch_gcnv_mappable_regions = params.gcnv_mappable_regions - ? Channel.fromPath(params.gcnv_mappable_regions).collect() - : Channel.value([[id: 'genome'], []]) - ch_gcnv_ploidy_priors = params.gcnv_ploidy_priors - ? Channel.fromPath(params.gcnv_ploidy_priors).collect() - : Channel.empty() - ch_gcnv_target_bed = params.gcnv_target_bed - ? Channel.fromPath(params.gcnv_target_bed).map { targets -> [[id: 'genome'], targets] }.collect() - : Channel.value([[id: 'genome'], []]) - ch_gcnv_target_interval_list = params.gcnv_target_interval_list - ? Channel.fromPath(params.gcnv_target_interval_list).map { targets -> [[id: 'genome'], targets] }.collect() - : Channel.value([[id: 'genome'], []]) - ch_gcnv_segmental_duplications = params.gcnv_segmental_duplications - ? Channel.fromPath(params.gcnv_segmental_duplications).collect() - : Channel.value([[id: 'genome'], []]) - - // Initialize mutect2 specific parameters - ch_user_mutect2_target_bed = params.mutect2_target_bed - ? Channel.fromPath(params.mutect2_target_bed).map { targets -> [[id: 'genome'], targets] }.collect() - : Channel.empty() - - // Initialize interval list specific parameters (GENS) - ch_user_gens_interval_list = params.gens_interval_list - ? Channel.fromPath(params.gens_interval_list).map { gens_interval_list -> [[id: 'genome'], gens_interval_list] }.collect() - : Channel.empty() - - ch_multiqc_config = Channel.fromPath("${projectDir}/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) - - PREPARE_GENOME(ch_fasta, ch_user_dict, ch_user_fai, ch_user_gens_interval_list, ch_user_mutect2_target_bed, tools) - ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) - - ch_dict = PREPARE_GENOME.out.dict - ch_fai = PREPARE_GENOME.out.fai - ch_interval_list = PREPARE_GENOME.out.gens_interval_list - ch_mutect2_target_bed = PREPARE_GENOME.out.mutect2_target_bed - - if (tools.split(',').contains('cnvkit')) { - - ch_samplesheet - .branch { meta, bam, _bai, cram, crai -> - bamfiles: bam - return [meta, bam] - cramfiles: cram - return [meta, cram, crai] - } - .set { ch_input_by_fmt } - - SAMTOOLS_VIEW(ch_input_by_fmt.cramfiles, ch_fasta, [], "").bam - .mix(ch_input_by_fmt.bamfiles) - .map { meta, bam -> - return [meta + [id: 'panel'], bam] - } - .groupTuple() - .map { meta, bam -> - return [meta, [], bam] - } - .set { ch_cnvkit_input } - - CNVKIT_BATCH(ch_cnvkit_input, ch_fasta, [[:], []], ch_cnvkit_targets, [[:], []], true) - ch_versions = ch_versions.mix(CNVKIT_BATCH.out.versions) - } - - if (tools.split(',').contains('germlinecnvcaller')) { - - ch_samplesheet - .map { meta, bam, bai, cram, crai -> - if (bam) { - return [meta + [data_type: 'bam'], bam, bai] - } - if (cram) { - return [meta + [data_type: 'cram'], cram, crai] - } - } - .set { ch_germlinecnvcaller_input } - - GERMLINECNVCALLER_COHORT( - ch_germlinecnvcaller_input, - params.gcnv_model_name, - ch_dict, - ch_fai, - ch_fasta, - ch_gcnv_exclude_bed, - ch_gcnv_exclude_interval_list, - ch_gcnv_mappable_regions, - ch_gcnv_ploidy_priors, - ch_gcnv_segmental_duplications, - ch_gcnv_target_bed, - ch_gcnv_target_interval_list, - ) - - ch_versions = ch_versions.mix(GERMLINECNVCALLER_COHORT.out.versions) - } - - if (tools.split(',').contains('mutect2')) { - - ch_mutect2_input = ch_samplesheet.map { meta, bam, bai, cram, crai -> - if (bam) { - return [meta + [data_type: 'bam'], bam, bai, []] - } - if (cram) { - return [meta + [data_type: 'cram'], cram, crai, []] - } - } - - BAM_CREATE_SOM_PON_GATK( - ch_mutect2_input, - ch_fasta, - ch_fai, - ch_dict, - params.mutect2_pon_name, - ch_mutect2_target_bed.map { _meta, target -> [target] }, - ) - - ch_versions = ch_versions.mix(BAM_CREATE_SOM_PON_GATK.out.versions) - } - - if (tools.split(',').contains('gens')) { - - ch_samplesheet - .map { meta, bam, bai, cram, crai -> - if (bam) { - return [meta + [data_type: 'bam'], bam, bai] - } - if (cram) { - return [meta + [data_type: 'cram'], cram, crai] - } - } - .set { ch_gens_input } - - GENS_PON( - ch_gens_input, - params.gens_pon_name, - ch_dict, - ch_fai, - ch_fasta, - ch_interval_list, - ) - - - ch_versions = ch_versions.mix(GENS_PON.out.versions) - } - - // - // Collate and save software versions - // - softwareVersionsToYAML(ch_versions) - .collectFile( - storeDir: "${params.outdir}/pipeline_info", - name: 'nf_core_ceatepanelrefs_software_mqc_versions.yml', - sort: true, - newLine: true, - ) - .set { ch_collated_versions } - - - // - // MODULE: MultiQC - // - ch_multiqc_config = Channel.fromPath( - "${projectDir}/assets/multiqc_config.yml", - checkIfExists: true - ) - ch_multiqc_custom_config = params.multiqc_config - ? Channel.fromPath(params.multiqc_config, checkIfExists: true) - : Channel.empty() - ch_multiqc_logo = params.multiqc_logo - ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) - : Channel.empty() - - summary_params = paramsSummaryMap( - workflow, - parameters_schema: "nextflow_schema.json" - ) - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_files = ch_multiqc_files.mix( - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') - ) - ch_multiqc_custom_methods_description = params.multiqc_methods_description - ? file(params.multiqc_methods_description, checkIfExists: true) - : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value( - methodsDescriptionText(ch_multiqc_custom_methods_description) - ) - - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix( - ch_methods_description.collectFile( - name: 'methods_description_mqc.yaml', - sort: true, - ) - ) - - MULTIQC( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList(), - [], - [], - ) - - emit: - multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] -} diff --git a/workflows/createpanelrefs/main.nf b/workflows/createpanelrefs/main.nf new file mode 100644 index 0000000..b71021e --- /dev/null +++ b/workflows/createpanelrefs/main.nf @@ -0,0 +1,138 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { BAM_CREATE_SOM_PON_GATK } from '../../subworkflows/nf-core/bam_create_som_pon_gatk' +include { CNVKIT_BATCH } from '../../modules/nf-core/cnvkit/batch' +include { GENS_PON } from '../../subworkflows/local/gens_pon' +include { GERMLINECNVCALLER_COHORT } from '../../subworkflows/local/germlinecnvcaller_cohort' +include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view' + +workflow CREATEPANELREFS { + take: + samplesheet // channel: samplesheet read in from --input + tools // array: tools to run, or no_tools if none (it's actually comma separated values string, but close enough) + gcnv_model_name // string: name of gcnv model + gens_pon_name // string: name of gens pon + mutect2_pon_name // string: name of mutect2 pon + fasta // channel: [meta, fasta] + dict // channel: [meta, dict] + fai // channel: [meta, fai] + cnvkit_targets // channel: [meta, cnvkit_targets] + gcnv_exclude_bed // channel: [meta, gcnv_exclude_bed] + gcnv_exclude_interval_list // channel: [meta, gcnv_exclude_interval_list] + gcnv_mappable_regions // channel: [meta, gcnv_mappable_regions] + gcnv_ploidy_priors // channel: [meta, gcnv_ploidy_priors] + gcnv_segmental_duplications // channel: [meta, gcnv_segmental_duplications] + gcnv_target_bed // channel: [meta, gcnv_target_bed] + gcnv_target_interval_list // channel: [meta, gcnv_target_interval_list] + gens_interval_list // channel: [meta, gens_interval_list] + mutect2_target_bed // channel: [meta, mutect2_target_bed] + + main: + versions = Channel.empty() + + if (tools.split(',').contains('cnvkit')) { + + input_by_fmt = samplesheet.branch { meta, bam, _bai, cram, crai -> + bam: bam + return [meta, bam] + cram: cram + return [meta, cram, crai] + } + + cnvkit_input = SAMTOOLS_VIEW(input_by_fmt.cram, fasta, [], "").bam + .mix(input_by_fmt.bam) + .map { meta, bam -> + return [meta + [id: 'panel'], bam] + } + .groupTuple() + .map { meta, bam -> + return [meta, [], bam] + } + + CNVKIT_BATCH(cnvkit_input, fasta, [[:], []], cnvkit_targets, [[:], []], true) + + versions = versions.mix(CNVKIT_BATCH.out.versions) + } + + if (tools.split(',').contains('germlinecnvcaller')) { + + germlinecnvcaller_input = samplesheet.map { meta, bam, bai, cram, crai -> + if (bam) { + return [meta + [data_type: 'bam'], bam, bai] + } + if (cram) { + return [meta + [data_type: 'cram'], cram, crai] + } + } + + GERMLINECNVCALLER_COHORT( + germlinecnvcaller_input, + gcnv_model_name, + dict, + fai, + fasta, + gcnv_exclude_bed, + gcnv_exclude_interval_list, + gcnv_mappable_regions, + gcnv_ploidy_priors, + gcnv_segmental_duplications, + gcnv_target_bed, + gcnv_target_interval_list, + ) + + versions = versions.mix(GERMLINECNVCALLER_COHORT.out.versions) + } + + if (tools.split(',').contains('mutect2')) { + + mutect2_input = samplesheet.map { meta, bam, bai, cram, crai -> + if (bam) { + return [meta + [data_type: 'bam'], bam, bai, []] + } + if (cram) { + return [meta + [data_type: 'cram'], cram, crai, []] + } + } + + BAM_CREATE_SOM_PON_GATK( + mutect2_input, + fasta, + fai, + dict, + mutect2_pon_name, + mutect2_target_bed.map { _meta, target -> [target] }, + ) + + versions = versions.mix(BAM_CREATE_SOM_PON_GATK.out.versions) + } + + if (tools.split(',').contains('gens')) { + + gens_input = samplesheet.map { meta, bam, bai, cram, crai -> + if (bam) { + return [meta + [data_type: 'bam'], bam, bai] + } + if (cram) { + return [meta + [data_type: 'cram'], cram, crai] + } + } + + GENS_PON( + gens_input, + gens_pon_name, + dict, + fai, + fasta, + gens_interval_list, + ) + + versions = versions.mix(GENS_PON.out.versions) + } + + emit: + versions // channel: [ path(versions.yml) ] +} From 0e3992df6416e6bda54c799e7dfea0ddd9527af7 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 16 Apr 2025 17:12:08 +0200 Subject: [PATCH 186/234] add new param --- nextflow_schema.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 2f91a4b..a25a290 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -216,6 +216,15 @@ "exists": true, "mimetype": "text/plain" }, + "gens_interval_list": { + "type": "string", + "description": "Path to GENS interval list file", + "pattern": "^\\S+\\.interval_list$", + "format": "file-path", + "fa_icon": "fas fa-file", + "exists": true, + "mimetype": "text/plain" + }, "gcnv_exclude_bed": { "type": "string", "exists": true, From 943c3d44c722a8f6d2a37c1253a23dee3c5ff86a Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 16 Apr 2025 18:34:57 +0200 Subject: [PATCH 187/234] update CHANGELOG --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3a8f2e6..09944f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#8](https://github.com/nf-core/createpanelrefs/pull/8) - `Mutect2` can be used to create a PON - [#10](https://github.com/nf-core/createpanelrefs/pull/10) - `GATK germlinecnvcaller` can be used to create a PON - [#17](https://github.com/nf-core/createpanelrefs/pull/17) - `GENS` can be used to create a PON +- [#50](https://github.com/nf-core/createpanelrefs/pull/50) - Add auto creation of interval_list file from gens, and bed file for mutect2 ### `Updated` @@ -28,9 +29,13 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#48](https://github.com/nf-core/createpanelrefs/pull/48) - Improve CI (early failure + automatic nf-test shards + [RunsOn](https://runs-on.com/)) - [#49](https://github.com/nf-core/createpanelrefs/pull/49) - Improve CI (Test Mutect2 with CRAM + better usage of test references) - [#49](https://github.com/nf-core/createpanelrefs/pull/49) - Move all parameters in the schema that are references in the references section +- [#50](https://github.com/nf-core/createpanelrefs/pull/50) - Improve references related files handling +- [#50](https://github.com/nf-core/createpanelrefs/pull/50) - Heavy refactoring of the pipeline ### `Fixed` +- [#50](https://github.com/nf-core/createpanelrefs/pull/50) - Fix mutect2 that wasn't working without a bed file + ### `Dependencies` ### `Deprecated` From 63192261b45960d717ef4adaab6bda8a974f2f2d Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 30 Apr 2025 15:41:55 +0200 Subject: [PATCH 188/234] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3a8f2e6..de6c091 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#48](https://github.com/nf-core/createpanelrefs/pull/48) - Improve CI (early failure + automatic nf-test shards + [RunsOn](https://runs-on.com/)) - [#49](https://github.com/nf-core/createpanelrefs/pull/49) - Improve CI (Test Mutect2 with CRAM + better usage of test references) - [#49](https://github.com/nf-core/createpanelrefs/pull/49) - Move all parameters in the schema that are references in the references section +- [#52](https://github.com/nf-core/createpanelrefs/pull/52) - Template update for nf-core/tools v3.2.1 ### `Fixed` From c942cc4b3ded6486542204db6783be2f5052b1a3 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 27 May 2025 11:05:59 +0200 Subject: [PATCH 189/234] use tips from @nvnieuwk --- subworkflows/local/prepare_genome/main.nf | 55 +++++++++-------------- 1 file changed, 21 insertions(+), 34 deletions(-) diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 49272b7..95eecfc 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -20,54 +20,41 @@ workflow PREPARE_GENOME { mutect2_target_bed = Channel.empty() versions = Channel.empty() - // If more than one file, then it means that the user has provided a dict file - // So we can pass out a null channel and GATK4_CREATESEQUENCEDICTIONARY won't be run + // Only run GATK4_CREATESEQUENCEDICTIONARY and generate dict if no user_dict is provided fasta_for_dict = fasta - .mix(user_dict) - .groupTuple() - .map { meta, files -> - files[1] ? null : [meta, files[0]] - } + .join(user_dict, remainder: true) + .filter { _meta, _fasta, dict_ -> !dict_ } + .map { meta, fasta_, _dict -> [meta, fasta_] } GATK4_CREATESEQUENCEDICTIONARY(fasta_for_dict) dict = user_dict.mix(GATK4_CREATESEQUENCEDICTIONARY.out.dict).collect() - // If more than one file, then it means that the user has provided a fai file - // So we can pass out a null channel and SAMTOOLS_FAIDX won't be run + // Only run SAMTOOLS_FAIDX and generate fai if no user_fai is provided fasta_for_fai = fasta - .mix(user_fai) - .groupTuple() - .map { meta, files -> - files[1] ? null : [meta, files[0]] - } + .join(user_fai, remainder: true) + .filter { _meta, _fasta, fai_ -> !fai_ } + .map { meta, fasta_, _fai -> [meta, fasta_] } SAMTOOLS_FAIDX(fasta_for_fai, [[:], []]) fai = user_fai.mix(SAMTOOLS_FAIDX.out.fai).collect() - // If more than one file, then it means that the user has provided an interval list file - // So we can pass out a null channel and GATK4_PREPROCESSINTERVALS_GENS won't be run - + // Only run GATK4_PREPROCESSINTERVALS_GENS and generate gens_interval_list if no user_gens_interval_list is provided fasta_for_interval_list = fasta - .mix(user_gens_interval_list) - .groupTuple() - .map { meta, files -> - files[1] || !tools.split(',').contains('gens') ? null : [meta, files[0]] - } + .join(user_gens_interval_list, remainder: true) + .filter { _meta, _fasta, interval_list_ -> !interval_list_ } + .map { meta, fasta_, _interval_list -> [meta, fasta_] } GATK4_PREPROCESSINTERVALS_GENS(fasta_for_interval_list, fai.collect(), dict.collect(), [[:], []], [[:], []]) gens_interval_list = user_gens_interval_list.mix(GATK4_PREPROCESSINTERVALS_GENS.out.interval_list).collect() - // If more than one file, then it means that the user has provided a fai file - // So we can pass out a null channel and SAMTOOLS_FAIDX won't be run + // Only run BUILD_INTERVALS and generate mutect2_target_bed if no user_mutect2_target_bed is provided fai_for_intervals = fai - .mix(user_mutect2_target_bed) - .groupTuple() - .map { meta, files -> - files[1] || !tools.split(',').contains('mutect2') ? null : [meta, files[0]] - } + .join(user_mutect2_target_bed, remainder: true) + .filter { _meta, _fai, mutect2_target_bed_ -> !mutect2_target_bed_ } + .map { meta, fai_, _mutect2_target_bed -> [meta, fai_] } BUILD_INTERVALS(fai_for_intervals, [], false) @@ -79,9 +66,9 @@ workflow PREPARE_GENOME { versions = versions.mix(SAMTOOLS_FAIDX.out.versions) emit: - dict - fai - gens_interval_list - mutect2_target_bed - versions + dict // channel: [mandatory] [ val(meta), path(dict) ] + fai // channel: [mandatory] [ val(meta), path(fai) ] + gens_interval_list // channel: [mandatory] [ val(meta), path(gens_interval_list) ] + mutect2_target_bed // channel: [mandatory] [ val(meta), path(mutect2_target_bed) ] + versions // channel: path(versions.yml) } From 6298321bfea536814a66697a1710a24a7eca5871 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 27 May 2025 11:33:01 +0200 Subject: [PATCH 190/234] out of this PR scope --- .../{createpanelrefs/main.nf => createpanelrefs.nf} | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) rename workflows/{createpanelrefs/main.nf => createpanelrefs.nf} (91%) diff --git a/workflows/createpanelrefs/main.nf b/workflows/createpanelrefs.nf similarity index 91% rename from workflows/createpanelrefs/main.nf rename to workflows/createpanelrefs.nf index b71021e..50e3338 100644 --- a/workflows/createpanelrefs/main.nf +++ b/workflows/createpanelrefs.nf @@ -4,11 +4,11 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { BAM_CREATE_SOM_PON_GATK } from '../../subworkflows/nf-core/bam_create_som_pon_gatk' -include { CNVKIT_BATCH } from '../../modules/nf-core/cnvkit/batch' -include { GENS_PON } from '../../subworkflows/local/gens_pon' -include { GERMLINECNVCALLER_COHORT } from '../../subworkflows/local/germlinecnvcaller_cohort' -include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view' +include { BAM_CREATE_SOM_PON_GATK } from '../subworkflows/nf-core/bam_create_som_pon_gatk' +include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch' +include { GENS_PON } from '../subworkflows/local/gens_pon' +include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' +include { SAMTOOLS_VIEW } from '../modules/nf-core/samtools/view' workflow CREATEPANELREFS { take: From ebcd7c57fdebe5e6afd1e12b014ab5abd9167f0d Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 27 May 2025 12:13:15 +0200 Subject: [PATCH 191/234] simplify logic --- subworkflows/local/prepare_genome/main.nf | 46 ++++++++++++----------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 95eecfc..6d899f4 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -20,41 +20,45 @@ workflow PREPARE_GENOME { mutect2_target_bed = Channel.empty() versions = Channel.empty() - // Only run GATK4_CREATESEQUENCEDICTIONARY and generate dict if no user_dict is provided + // If more than one file, then it means that the user has provided a dict file + // So we can pass out a null channel and GATK4_CREATESEQUENCEDICTIONARY won't be run fasta_for_dict = fasta - .join(user_dict, remainder: true) - .filter { _meta, _fasta, dict_ -> !dict_ } - .map { meta, fasta_, _dict -> [meta, fasta_] } + .mix(user_dict) + .filter { _meta, files -> !files[1] } GATK4_CREATESEQUENCEDICTIONARY(fasta_for_dict) dict = user_dict.mix(GATK4_CREATESEQUENCEDICTIONARY.out.dict).collect() - // Only run SAMTOOLS_FAIDX and generate fai if no user_fai is provided + // If more than one file, then it means that the user has provided a fai file + // So we can pass out a null channel and SAMTOOLS_FAIDX won't be run fasta_for_fai = fasta - .join(user_fai, remainder: true) - .filter { _meta, _fasta, fai_ -> !fai_ } - .map { meta, fasta_, _fai -> [meta, fasta_] } + .mix(user_fai) + .groupTuple() + .filter { _meta, files -> !files[1] } SAMTOOLS_FAIDX(fasta_for_fai, [[:], []]) fai = user_fai.mix(SAMTOOLS_FAIDX.out.fai).collect() - // Only run GATK4_PREPROCESSINTERVALS_GENS and generate gens_interval_list if no user_gens_interval_list is provided + // If more than one file, then it means that the user has provided an interval list file + // So we can pass out a null channel and GATK4_PREPROCESSINTERVALS_GENS won't be run + fasta_for_interval_list = fasta - .join(user_gens_interval_list, remainder: true) - .filter { _meta, _fasta, interval_list_ -> !interval_list_ } - .map { meta, fasta_, _interval_list -> [meta, fasta_] } + .mix(user_gens_interval_list) + .groupTuple() + .filter { _meta, files -> (tools.split(',').contains('gens') && !files[1]) } GATK4_PREPROCESSINTERVALS_GENS(fasta_for_interval_list, fai.collect(), dict.collect(), [[:], []], [[:], []]) gens_interval_list = user_gens_interval_list.mix(GATK4_PREPROCESSINTERVALS_GENS.out.interval_list).collect() - // Only run BUILD_INTERVALS and generate mutect2_target_bed if no user_mutect2_target_bed is provided + // If more than one file, then it means that the user has provided a fai file + // So we can pass out a null channel and SAMTOOLS_FAIDX won't be run fai_for_intervals = fai - .join(user_mutect2_target_bed, remainder: true) - .filter { _meta, _fai, mutect2_target_bed_ -> !mutect2_target_bed_ } - .map { meta, fai_, _mutect2_target_bed -> [meta, fai_] } + .mix(user_mutect2_target_bed) + .groupTuple() + .filter { _meta, files -> (tools.split(',').contains('mutect2') && !files[1]) } BUILD_INTERVALS(fai_for_intervals, [], false) @@ -66,9 +70,9 @@ workflow PREPARE_GENOME { versions = versions.mix(SAMTOOLS_FAIDX.out.versions) emit: - dict // channel: [mandatory] [ val(meta), path(dict) ] - fai // channel: [mandatory] [ val(meta), path(fai) ] - gens_interval_list // channel: [mandatory] [ val(meta), path(gens_interval_list) ] - mutect2_target_bed // channel: [mandatory] [ val(meta), path(mutect2_target_bed) ] - versions // channel: path(versions.yml) + dict // channel: [ val(meta), path(dict) ] + fai // channel: [ val(meta), path(fai) ] + gens_interval_list // channel: [ val(meta), path(gens_interval_list) ] + mutect2_target_bed // channel: [ val(meta), path(mutect2_target_bed) ] + versions // channel: [ path(versions.yml)] } From b97806d5520109731a551668a42ef19017aa5b9d Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 27 May 2025 13:27:51 +0200 Subject: [PATCH 192/234] code polish --- .github/workflows/awsfulltest.yml | 3 --- assets/methods_description_template.yml | 2 -- conf/base.config | 8 -------- nextflow_schema.json | 9 +++++++-- subworkflows/local/prepare_genome/main.nf | 18 +++++++++--------- 5 files changed, 16 insertions(+), 24 deletions(-) diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 408cf7f..948a127 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -24,9 +24,6 @@ jobs: - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 84155f8..76fa009 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,8 +3,6 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/createpanelrefs Methods Description" section_href: "https://github.com/nf-core/createpanelrefs" plot_type: "html" -## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline -## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

Data was processed using nf-core/createpanelrefs v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

diff --git a/conf/base.config b/conf/base.config index 0631dea..3188891 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,6 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { 1 * task.attempt } memory = { 6.GB * task.attempt } time = { 4.h * task.attempt } @@ -19,13 +18,6 @@ process { maxRetries = 1 maxErrors = '-1' - // Process-specific resource requirements - // NOTE - Please try and reuse the labels below as much as possible. - // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. - // If possible, it would be nice to keep the same label naming convention when - // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { 1 } memory = { 6.GB * task.attempt } diff --git a/nextflow_schema.json b/nextflow_schema.json index a25a290..11c517e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -98,7 +98,11 @@ "default": "", "properties": { "cnvkit_targets": { - "type": "string" + "type": "string", + "format": "path", + "fa_icon": "fas fa-file", + "description": "Path to directory for target file.", + "help_text": "Specify the path to the target file for CNVkit." } } }, @@ -109,7 +113,8 @@ "default": "", "properties": { "mutect2_pon_name": { - "type": "string" + "type": "string", + "description": "Name for panel of normals." } } }, diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 6d899f4..41aa104 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -20,18 +20,19 @@ workflow PREPARE_GENOME { mutect2_target_bed = Channel.empty() versions = Channel.empty() - // If more than one file, then it means that the user has provided a dict file - // So we can pass out a null channel and GATK4_CREATESEQUENCEDICTIONARY won't be run + // If a user_dict is provided, no fasta will be used to generate a dict + // Otherwise, GATK4_CREATESEQUENCEDICTIONARY will be run to generate a dict fasta_for_dict = fasta .mix(user_dict) + .groupTuple() .filter { _meta, files -> !files[1] } GATK4_CREATESEQUENCEDICTIONARY(fasta_for_dict) dict = user_dict.mix(GATK4_CREATESEQUENCEDICTIONARY.out.dict).collect() - // If more than one file, then it means that the user has provided a fai file - // So we can pass out a null channel and SAMTOOLS_FAIDX won't be run + // If a user_fai is provided, no fasta will be used to generate a fai + // Otherwise, SAMTOOLS_FAIDX will be run to generate a fai fasta_for_fai = fasta .mix(user_fai) .groupTuple() @@ -41,9 +42,8 @@ workflow PREPARE_GENOME { fai = user_fai.mix(SAMTOOLS_FAIDX.out.fai).collect() - // If more than one file, then it means that the user has provided an interval list file - // So we can pass out a null channel and GATK4_PREPROCESSINTERVALS_GENS won't be run - + // If a user_gens_interval_list is provided or if gens is not a specified tools, no fasta will be used to generate an interval list + // Otherwise, GATK4_PREPROCESSINTERVALS_GENS will be run to generate an interval list fasta_for_interval_list = fasta .mix(user_gens_interval_list) .groupTuple() @@ -53,8 +53,8 @@ workflow PREPARE_GENOME { gens_interval_list = user_gens_interval_list.mix(GATK4_PREPROCESSINTERVALS_GENS.out.interval_list).collect() - // If more than one file, then it means that the user has provided a fai file - // So we can pass out a null channel and SAMTOOLS_FAIDX won't be run + // If a user_mutect2_target_bed is provided or if mutect2 is not a specified tools, no fai will be used to generate a target bed + // Otherwise, BUILD_INTERVALS will be run to generate a target bed fai_for_intervals = fai .mix(user_mutect2_target_bed) .groupTuple() From 7724cdb262a722e638d1d44606749adacee3fc61 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 27 May 2025 14:10:47 +0200 Subject: [PATCH 193/234] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c3a57ae..2ab03a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n ### `Fixed` - [#50](https://github.com/nf-core/createpanelrefs/pull/50) - Fix mutect2 that wasn't working without a bed file +- [#53](https://github.com/nf-core/createpanelrefs/pull/53) - Minor syntax fixes due to [#50](https://github.com/nf-core/createpanelrefs/pull/50) ### `Dependencies` From 77cd2900b6169ed46bfb665ff385363791ff1a6d Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Thu, 5 Jun 2025 17:47:12 +0200 Subject: [PATCH 194/234] Update nf-test.config --- nf-test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf-test.config b/nf-test.config index ceddf62..4a258d9 100644 --- a/nf-test.config +++ b/nf-test.config @@ -9,7 +9,7 @@ config { configFile "tests/nextflow.config" // ignore tests coming from the nf-core/modules repo - ignore 'modules/nf-core/**/tests', 'subworkflows/nf-core/**/tests' + ignore 'modules/nf-core/**/tests/*', 'subworkflows/nf-core/**/tests/*' // run all test with defined profile(s) from the main nextflow.config profile "test" From f9a145a3d66e8457a0caf89f477e08ac23eabe63 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 12 Jun 2025 11:06:13 +0200 Subject: [PATCH 195/234] update subworkflows --- modules.json | 38 +- modules/nf-core/cnvkit/batch/environment.yml | 6 +- modules/nf-core/cnvkit/batch/tests/tags.yml | 2 - .../gatk4/annotateintervals/environment.yml | 6 +- .../nf-core/gatk4/annotateintervals/main.nf | 4 +- .../annotateintervals/tests/main.nf.test.snap | 58 +-- .../gatk4/annotateintervals/tests/tags.yml | 2 - .../gatk4/bedtointervallist/environment.yml | 6 +- .../nf-core/gatk4/bedtointervallist/main.nf | 6 +- .../bedtointervallist/tests/main.nf.test.snap | 10 +- .../gatk4/bedtointervallist/tests/tags.yml | 2 - .../gatk4/collectreadcounts/environment.yml | 6 +- .../nf-core/gatk4/collectreadcounts/main.nf | 6 +- .../collectreadcounts/tests/main.nf.test.snap | 20 +- .../environment.yml | 6 +- .../createreadcountpanelofnormals/main.nf | 5 +- .../tests/main.nf.test | 102 ++++++ .../tests/main.nf.test.snap | 48 +++ .../tests/nextflow.config | 14 + .../createsequencedictionary/environment.yml | 6 +- .../gatk4/createsequencedictionary/main.nf | 6 +- .../tests/main.nf.test.snap | 20 +- .../createsequencedictionary/tests/tags.yml | 2 - .../environment.yml | 6 +- .../gatk4/createsomaticpanelofnormals/main.nf | 18 +- .../createsomaticpanelofnormals/meta.yml | 2 +- .../tests/main.nf.test | 90 +++++ .../tests/main.nf.test.snap | 30 ++ .../determinegermlinecontigploidy/README.md | 9 - .../environment.yml | 9 + .../determinegermlinecontigploidy/main.nf | 15 +- .../tests/main.nf.test | 105 ++++++ .../tests/main.nf.test.snap | 77 ++++ .../tests/nextflow.config | 8 + .../gatk4/filterintervals/environment.yml | 6 +- modules/nf-core/gatk4/filterintervals/main.nf | 6 +- .../filterintervals/tests/main.nf.test.snap | 10 +- .../gatk4/genomicsdbimport/environment.yml | 6 +- .../nf-core/gatk4/genomicsdbimport/main.nf | 6 +- .../nf-core/gatk4/genomicsdbimport/meta.yml | 2 +- .../genomicsdbimport/tests/main.nf.test.snap | 34 +- .../gatk4/genomicsdbimport/tests/tags.yml | 3 - .../nf-core/gatk4/germlinecnvcaller/README.md | 9 - .../gatk4/germlinecnvcaller/environment.yml | 9 + .../nf-core/gatk4/germlinecnvcaller/main.nf | 15 +- .../germlinecnvcaller/tests/main.nf.test | 126 +++++++ .../germlinecnvcaller/tests/main.nf.test.snap | 55 +++ .../germlinecnvcaller/tests/nextflow.config | 11 + .../gatk4/indexfeaturefile/environment.yml | 6 +- .../nf-core/gatk4/indexfeaturefile/main.nf | 4 +- .../indexfeaturefile/tests/main.nf.test.snap | 36 +- .../gatk4/indexfeaturefile/tests/tags.yml | 2 - .../gatk4/intervallisttools/environment.yml | 6 +- .../nf-core/gatk4/intervallisttools/main.nf | 6 +- .../intervallisttools/tests/main.nf.test.snap | 20 +- .../gatk4/intervallisttools/tests/tags.yml | 2 - modules/nf-core/gatk4/mutect2/environment.yml | 6 +- modules/nf-core/gatk4/mutect2/main.nf | 10 +- .../nf-core/gatk4/mutect2/tests/f1r2.config | 3 - .../nf-core/gatk4/mutect2/tests/main.nf.test | 65 ++-- .../gatk4/mutect2/tests/main.nf.test.snap | 219 ++++++++---- .../nf-core/gatk4/mutect2/tests/mito.config | 3 - .../gatk4/mutect2/tests/nextflow.config | 5 + .../nf-core/gatk4/mutect2/tests/pair.config | 3 - modules/nf-core/gatk4/mutect2/tests/tags.yml | 2 - .../gatk4/preprocessintervals/environment.yml | 6 +- .../nf-core/gatk4/preprocessintervals/main.nf | 6 +- .../tests/main.nf.test.snap | 10 +- modules/nf-core/multiqc/environment.yml | 4 +- modules/nf-core/multiqc/main.nf | 4 +- .../nf-core/multiqc/tests/main.nf.test.snap | 18 +- modules/nf-core/multiqc/tests/tags.yml | 2 - modules/nf-core/samtools/faidx/main.nf | 13 +- modules/nf-core/samtools/faidx/meta.yml | 20 +- .../nf-core/samtools/faidx/tests/main.nf.test | 107 +++++- .../samtools/faidx/tests/main.nf.test.snap | 336 ++++++++++++++++-- modules/nf-core/samtools/faidx/tests/tags.yml | 2 - modules/nf-core/samtools/index/tests/tags.yml | 2 - subworkflows/local/prepare_genome/main.nf | 2 +- .../bam_create_som_pon_gatk/tests/tags.yml | 2 - tests/default.nf.test.snap | 6 +- tests/gens_pon.nf.test.snap | 12 +- 82 files changed, 1593 insertions(+), 405 deletions(-) delete mode 100644 modules/nf-core/cnvkit/batch/tests/tags.yml delete mode 100644 modules/nf-core/gatk4/annotateintervals/tests/tags.yml delete mode 100644 modules/nf-core/gatk4/bedtointervallist/tests/tags.yml create mode 100644 modules/nf-core/gatk4/createreadcountpanelofnormals/tests/main.nf.test create mode 100644 modules/nf-core/gatk4/createreadcountpanelofnormals/tests/main.nf.test.snap create mode 100644 modules/nf-core/gatk4/createreadcountpanelofnormals/tests/nextflow.config delete mode 100644 modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml create mode 100644 modules/nf-core/gatk4/createsomaticpanelofnormals/tests/main.nf.test create mode 100644 modules/nf-core/gatk4/createsomaticpanelofnormals/tests/main.nf.test.snap delete mode 100644 modules/nf-core/gatk4/determinegermlinecontigploidy/README.md create mode 100644 modules/nf-core/gatk4/determinegermlinecontigploidy/environment.yml create mode 100644 modules/nf-core/gatk4/determinegermlinecontigploidy/tests/main.nf.test create mode 100644 modules/nf-core/gatk4/determinegermlinecontigploidy/tests/main.nf.test.snap create mode 100644 modules/nf-core/gatk4/determinegermlinecontigploidy/tests/nextflow.config delete mode 100644 modules/nf-core/gatk4/genomicsdbimport/tests/tags.yml delete mode 100644 modules/nf-core/gatk4/germlinecnvcaller/README.md create mode 100644 modules/nf-core/gatk4/germlinecnvcaller/environment.yml create mode 100644 modules/nf-core/gatk4/germlinecnvcaller/tests/main.nf.test create mode 100644 modules/nf-core/gatk4/germlinecnvcaller/tests/main.nf.test.snap create mode 100644 modules/nf-core/gatk4/germlinecnvcaller/tests/nextflow.config delete mode 100644 modules/nf-core/gatk4/indexfeaturefile/tests/tags.yml delete mode 100644 modules/nf-core/gatk4/intervallisttools/tests/tags.yml delete mode 100644 modules/nf-core/gatk4/mutect2/tests/f1r2.config delete mode 100644 modules/nf-core/gatk4/mutect2/tests/mito.config create mode 100644 modules/nf-core/gatk4/mutect2/tests/nextflow.config delete mode 100644 modules/nf-core/gatk4/mutect2/tests/pair.config delete mode 100644 modules/nf-core/gatk4/mutect2/tests/tags.yml delete mode 100644 modules/nf-core/multiqc/tests/tags.yml delete mode 100644 modules/nf-core/samtools/faidx/tests/tags.yml delete mode 100644 modules/nf-core/samtools/index/tests/tags.yml delete mode 100644 subworkflows/nf-core/bam_create_som_pon_gatk/tests/tags.yml diff --git a/modules.json b/modules.json index ff521c2..9c0ebdd 100644 --- a/modules.json +++ b/modules.json @@ -7,77 +7,77 @@ "nf-core": { "cnvkit/batch": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "gatk4/annotateintervals": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "gatk4/bedtointervallist": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "gatk4/collectreadcounts": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "gatk4/createreadcountpanelofnormals": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "gatk4/createsequencedictionary": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "gatk4/createsomaticpanelofnormals": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["bam_create_som_pon_gatk"] }, "gatk4/determinegermlinecontigploidy": { "branch": "master", - "git_sha": "d6dd4ddea1663edbf15f40e089cc16db96e75f1d", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "gatk4/filterintervals": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "gatk4/genomicsdbimport": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["bam_create_som_pon_gatk"] }, "gatk4/germlinecnvcaller": { "branch": "master", - "git_sha": "d6dd4ddea1663edbf15f40e089cc16db96e75f1d", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "gatk4/indexfeaturefile": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "gatk4/intervallisttools": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "gatk4/mutect2": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "git_sha": "a97cba262e9367734e435dc07d2e3b7d6121ef3e", "installed_by": ["bam_create_som_pon_gatk"] }, "gatk4/preprocessintervals": { "branch": "master", - "git_sha": "08108058ea36a63f141c25c4e75f9f872a5b2296", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "gawk": { @@ -87,17 +87,17 @@ }, "multiqc": { "branch": "master", - "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad", + "git_sha": "471cf3ca1617271b9b6fea09ea2ebdee78b874de", "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", - "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "samtools/index": { "branch": "master", - "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["modules"] }, "samtools/view": { @@ -111,7 +111,7 @@ "nf-core": { "bam_create_som_pon_gatk": { "branch": "master", - "git_sha": "b0783b07beb65cac505fa6202e8f670437637b45", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["subworkflows"] }, "utils_nextflow_pipeline": { diff --git a/modules/nf-core/cnvkit/batch/environment.yml b/modules/nf-core/cnvkit/batch/environment.yml index 5d79360..a2466da 100644 --- a/modules/nf-core/cnvkit/batch/environment.yml +++ b/modules/nf-core/cnvkit/batch/environment.yml @@ -1,8 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - bioconda::cnvkit=0.9.10 - - bioconda::htslib=1.17 # Matched with the container - - bioconda::samtools=1.17 # Matched with the container + - bioconda::htslib=1.17 + - bioconda::samtools=1.17 diff --git a/modules/nf-core/cnvkit/batch/tests/tags.yml b/modules/nf-core/cnvkit/batch/tests/tags.yml deleted file mode 100644 index 1c8565c..0000000 --- a/modules/nf-core/cnvkit/batch/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -cnvkit/batch: - - "modules/nf-core/cnvkit/batch/**" diff --git a/modules/nf-core/gatk4/annotateintervals/environment.yml b/modules/nf-core/gatk4/annotateintervals/environment.yml index 55993f4..b562b72 100644 --- a/modules/nf-core/gatk4/annotateintervals/environment.yml +++ b/modules/nf-core/gatk4/annotateintervals/environment.yml @@ -1,5 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::gatk4=4.5.0.0 + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/annotateintervals/main.nf b/modules/nf-core/gatk4/annotateintervals/main.nf index dfe9dc2..1f02637 100644 --- a/modules/nf-core/gatk4/annotateintervals/main.nf +++ b/modules/nf-core/gatk4/annotateintervals/main.nf @@ -4,8 +4,8 @@ process GATK4_ANNOTATEINTERVALS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: tuple val(meta), path(intervals) diff --git a/modules/nf-core/gatk4/annotateintervals/tests/main.nf.test.snap b/modules/nf-core/gatk4/annotateintervals/tests/main.nf.test.snap index 749f6fb..3b06f09 100644 --- a/modules/nf-core/gatk4/annotateintervals/tests/main.nf.test.snap +++ b/modules/nf-core/gatk4/annotateintervals/tests/main.nf.test.snap @@ -12,7 +12,7 @@ ] ], "1": [ - "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" + "versions.yml:md5,6e1e1caad2a19737e858bc743284cbb2" ], "annotated_intervals": [ [ @@ -24,15 +24,15 @@ ] ], "versions": [ - "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" + "versions.yml:md5,6e1e1caad2a19737e858bc743284cbb2" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-02-13T16:07:36.32193" + "timestamp": "2024-10-31T10:29:29.006068994" }, "homo_sapiens genome [interval_list]": { "content": [ @@ -47,7 +47,7 @@ ] ], "1": [ - "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" + "versions.yml:md5,6e1e1caad2a19737e858bc743284cbb2" ], "annotated_intervals": [ [ @@ -59,15 +59,15 @@ ] ], "versions": [ - "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" + "versions.yml:md5,6e1e1caad2a19737e858bc743284cbb2" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-02-13T16:08:12.533176" + "timestamp": "2024-10-31T10:30:08.763305448" }, "homo_sapiens genome multi-interval [bed]": { "content": [ @@ -82,7 +82,7 @@ ] ], "1": [ - "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" + "versions.yml:md5,6e1e1caad2a19737e858bc743284cbb2" ], "annotated_intervals": [ [ @@ -94,15 +94,15 @@ ] ], "versions": [ - "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" + "versions.yml:md5,6e1e1caad2a19737e858bc743284cbb2" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-02-13T16:07:54.643276" + "timestamp": "2024-10-31T10:29:48.403638052" }, "homo_sapiens genome [interval_list] mappable_regions": { "content": [ @@ -117,7 +117,7 @@ ] ], "1": [ - "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" + "versions.yml:md5,6e1e1caad2a19737e858bc743284cbb2" ], "annotated_intervals": [ [ @@ -129,28 +129,28 @@ ] ], "versions": [ - "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" + "versions.yml:md5,6e1e1caad2a19737e858bc743284cbb2" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-02-13T16:08:31.311782" + "timestamp": "2024-10-31T10:30:23.256626974" }, "homo_sapiens genome [bed] - stub": { "content": [ [ "test.tsv", - "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" + "versions.yml:md5,6e1e1caad2a19737e858bc743284cbb2" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-02-13T16:09:04.54745" + "timestamp": "2024-10-31T10:30:52.179735439" }, "homo_sapiens genome [interval_list] duplication_regions": { "content": [ @@ -165,7 +165,7 @@ ] ], "1": [ - "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" + "versions.yml:md5,6e1e1caad2a19737e858bc743284cbb2" ], "annotated_intervals": [ [ @@ -177,14 +177,14 @@ ] ], "versions": [ - "versions.yml:md5,599cde136feb5d60b2d01b5fbd252fa0" + "versions.yml:md5,6e1e1caad2a19737e858bc743284cbb2" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-02-13T16:08:49.684766" + "timestamp": "2024-10-31T10:30:37.398809104" } } \ No newline at end of file diff --git a/modules/nf-core/gatk4/annotateintervals/tests/tags.yml b/modules/nf-core/gatk4/annotateintervals/tests/tags.yml deleted file mode 100644 index 74cc19b..0000000 --- a/modules/nf-core/gatk4/annotateintervals/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -gatk4/annotateintervals: - - "modules/nf-core/gatk4/annotateintervals/**" diff --git a/modules/nf-core/gatk4/bedtointervallist/environment.yml b/modules/nf-core/gatk4/bedtointervallist/environment.yml index 55993f4..b562b72 100644 --- a/modules/nf-core/gatk4/bedtointervallist/environment.yml +++ b/modules/nf-core/gatk4/bedtointervallist/environment.yml @@ -1,5 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::gatk4=4.5.0.0 + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/bedtointervallist/main.nf b/modules/nf-core/gatk4/bedtointervallist/main.nf index 68863d6..89960e0 100644 --- a/modules/nf-core/gatk4/bedtointervallist/main.nf +++ b/modules/nf-core/gatk4/bedtointervallist/main.nf @@ -1,11 +1,11 @@ process GATK4_BEDTOINTERVALLIST { tag "$meta.id" - label 'process_medium' + label 'process_single' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: tuple val(meta), path(bed) diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap index 48c322f..6936cf9 100644 --- a/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap +++ b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap @@ -11,7 +11,7 @@ ] ], "1": [ - "versions.yml:md5,29a18c36f27584eb5a5f2f5457088b3b" + "versions.yml:md5,6b3aa4d49cc3ba433ecf92e31f155d00" ], "interval_list": [ [ @@ -22,14 +22,14 @@ ] ], "versions": [ - "versions.yml:md5,29a18c36f27584eb5a5f2f5457088b3b" + "versions.yml:md5,6b3aa4d49cc3ba433ecf92e31f155d00" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-03-19T14:20:12.168775" + "timestamp": "2024-10-31T10:37:25.720782902" } } \ No newline at end of file diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml b/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml deleted file mode 100644 index b4d54f1..0000000 --- a/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -gatk4/bedtointervallist: - - "modules/nf-core/gatk4/bedtointervallist/**" diff --git a/modules/nf-core/gatk4/collectreadcounts/environment.yml b/modules/nf-core/gatk4/collectreadcounts/environment.yml index 55993f4..b562b72 100644 --- a/modules/nf-core/gatk4/collectreadcounts/environment.yml +++ b/modules/nf-core/gatk4/collectreadcounts/environment.yml @@ -1,5 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::gatk4=4.5.0.0 + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/collectreadcounts/main.nf b/modules/nf-core/gatk4/collectreadcounts/main.nf index 2c545d2..c742a16 100644 --- a/modules/nf-core/gatk4/collectreadcounts/main.nf +++ b/modules/nf-core/gatk4/collectreadcounts/main.nf @@ -1,11 +1,11 @@ process GATK4_COLLECTREADCOUNTS { tag "$meta.id" - label 'process_medium' + label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: tuple val(meta), path(input), path(input_index), path(intervals) diff --git a/modules/nf-core/gatk4/collectreadcounts/tests/main.nf.test.snap b/modules/nf-core/gatk4/collectreadcounts/tests/main.nf.test.snap index 28a3a6e..212f950 100644 --- a/modules/nf-core/gatk4/collectreadcounts/tests/main.nf.test.snap +++ b/modules/nf-core/gatk4/collectreadcounts/tests/main.nf.test.snap @@ -15,7 +15,7 @@ ] ], "2": [ - "versions.yml:md5,ebf23f4ab63948ba97df07035f8d2659" + "versions.yml:md5,5d92970288bee9ff45551e783f7058fb" ], "hdf5": [ @@ -30,15 +30,15 @@ ] ], "versions": [ - "versions.yml:md5,ebf23f4ab63948ba97df07035f8d2659" + "versions.yml:md5,5d92970288bee9ff45551e783f7058fb" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-08-26T12:58:19.610687" + "timestamp": "2024-10-31T10:44:48.133712731" }, "test-gatk4-collectreadcounts-hdf5": { "content": [ @@ -69,7 +69,7 @@ ] ], "2": [ - "versions.yml:md5,ebf23f4ab63948ba97df07035f8d2659" + "versions.yml:md5,5d92970288bee9ff45551e783f7058fb" ], "hdf5": [ @@ -84,14 +84,14 @@ ] ], "versions": [ - "versions.yml:md5,ebf23f4ab63948ba97df07035f8d2659" + "versions.yml:md5,5d92970288bee9ff45551e783f7058fb" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-08-26T12:58:07.500024" + "timestamp": "2024-10-31T10:44:24.624195466" } } \ No newline at end of file diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml b/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml index 55993f4..b562b72 100644 --- a/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml @@ -1,5 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::gatk4=4.5.0.0 + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf b/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf index 332f9d9..b754b05 100644 --- a/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf @@ -4,8 +4,8 @@ process GATK4_CREATEREADCOUNTPANELOFNORMALS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: tuple val(meta), path(counts) @@ -42,7 +42,6 @@ process GATK4_CREATEREADCOUNTPANELOFNORMALS { """ stub: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.hdf5 diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/tests/main.nf.test b/modules/nf-core/gatk4/createreadcountpanelofnormals/tests/main.nf.test new file mode 100644 index 0000000..f16c2df --- /dev/null +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/tests/main.nf.test @@ -0,0 +1,102 @@ +nextflow_process { + + name "Test Process GATK4_CREATEREADCOUNTPANELOFNORMALS" + script "../main.nf" + config "./nextflow.config" + process "GATK4_CREATEREADCOUNTPANELOFNORMALS" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/preprocessintervals" + tag "gatk4/collectreadcounts" + tag "gatk4/createreadcountpanelofnormals" + + setup { + run("GATK4_PREPROCESSINTERVALS") { + script "../../preprocessintervals/main.nf" + process { + """ + fasta = Channel.of([ [ id:'test' ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta",checkIfExists: true)]).collect() + fai = Channel.of([ [ id:'test' ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.fai",checkIfExists: true)]).collect() + dict = Channel.of([ [ id:'test' ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.dict",checkIfExists: true)]).collect() + + input[0] = fasta + input[1] = fai + input[2] = dict + input[3] = [[],[]] + input[4] = [[],[]] + """ + } + } + run("GATK4_COLLECTREADCOUNTS") { + script "../../collectreadcounts/main.nf" + process { + """ + intervals = GATK4_PREPROCESSINTERVALS.out.interval_list.map{ meta, list -> list} + input[0] = Channel.of([ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true), + ], + [ + [ id:'test2', single_end:false ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai", checkIfExists: true), + ]) + .combine(intervals) + input[1] = Channel.of([ [ id:'test' ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta",checkIfExists: true)]).collect() + input[2] = Channel.of([ [ id:'test' ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.fai",checkIfExists: true)]).collect() + input[3] = Channel.of([ [ id:'test' ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.dict",checkIfExists: true)]).collect() + """ + } + } + } + + test("homo sapiens - bam") { + when { + process { + """ + input[0] = GATK4_COLLECTREADCOUNTS.out.tsv + .map({ meta, tsv -> [[id:'test'], tsv ] }) + .groupTuple() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.pon[0][1]).name + ).match() } + ) + } + + } + + test("homo sapiens - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = GATK4_COLLECTREADCOUNTS.out.tsv + .map({ meta, tsv -> [[id:'test'], tsv ] }) + .groupTuple() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/tests/main.nf.test.snap b/modules/nf-core/gatk4/createreadcountpanelofnormals/tests/main.nf.test.snap new file mode 100644 index 0000000..630cab6 --- /dev/null +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/tests/main.nf.test.snap @@ -0,0 +1,48 @@ +{ + "homo sapiens - bam": { + "content": [ + [ + "versions.yml:md5,3b00a427858f3b991cc03fe6b467fe9d" + ], + "test.hdf5" + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-06T09:28:03.932486659" + }, + "homo sapiens - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.hdf5:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,3b00a427858f3b991cc03fe6b467fe9d" + ], + "pon": [ + [ + { + "id": "test" + }, + "test.hdf5:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3b00a427858f3b991cc03fe6b467fe9d" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-14T10:57:07.220349234" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/tests/nextflow.config b/modules/nf-core/gatk4/createreadcountpanelofnormals/tests/nextflow.config new file mode 100644 index 0000000..90cfd3c --- /dev/null +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/tests/nextflow.config @@ -0,0 +1,14 @@ +docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64 -e "HOME=${HOME}" -v /etc/passwd:/etc/passwd:ro -v /etc/shadow:/etc/shadow:ro -v /etc/group:/etc/group:ro -v $HOME:$HOME' +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: GATK4_COLLECTREADCOUNTS { + ext.args = "--format TSV --interval-merging-rule OVERLAPPING_ONLY" + } + + withName: GATK4_CREATEREADCOUNTPANELOFNORMALS { + ext.args = "--minimum-interval-median-percentile 1.0 --number-of-eigensamples 2" + } + +} diff --git a/modules/nf-core/gatk4/createsequencedictionary/environment.yml b/modules/nf-core/gatk4/createsequencedictionary/environment.yml index 55993f4..b562b72 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/environment.yml +++ b/modules/nf-core/gatk4/createsequencedictionary/environment.yml @@ -1,5 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::gatk4=4.5.0.0 + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf index c7f1d75..998622a 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/main.nf +++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf @@ -1,11 +1,11 @@ process GATK4_CREATESEQUENCEDICTIONARY { tag "$fasta" - label 'process_medium' + label 'process_single' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap index 16735f9..e8a600f 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap +++ b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap @@ -11,7 +11,7 @@ ] ], "1": [ - "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6" + "versions.yml:md5,e993b2c99f7f6b0fcd8428de15c61439" ], "dict": [ [ @@ -22,15 +22,15 @@ ] ], "versions": [ - "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6" + "versions.yml:md5,e993b2c99f7f6b0fcd8428de15c61439" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-05-16T10:16:16.34453" + "timestamp": "2024-10-31T10:51:56.155954077" }, "sarscov2 - fasta": { "content": [ @@ -44,7 +44,7 @@ ] ], "1": [ - "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6" + "versions.yml:md5,e993b2c99f7f6b0fcd8428de15c61439" ], "dict": [ [ @@ -55,14 +55,14 @@ ] ], "versions": [ - "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6" + "versions.yml:md5,e993b2c99f7f6b0fcd8428de15c61439" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-05-16T13:58:25.822068" + "timestamp": "2024-10-31T10:51:45.562993875" } } \ No newline at end of file diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml b/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml deleted file mode 100644 index 035c5e4..0000000 --- a/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -gatk4/createsequencedictionary: - - "modules/nf-core/gatk4/createsequencedictionary/**" diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml b/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml index 55993f4..b562b72 100644 --- a/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml @@ -1,5 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::gatk4=4.5.0.0 + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf b/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf index 27a50dc..fb6fdf7 100644 --- a/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf @@ -4,8 +4,8 @@ process GATK4_CREATESOMATICPANELOFNORMALS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: tuple val(meta), path(genomicsdb) @@ -45,4 +45,18 @@ process GATK4_CREATESOMATICPANELOFNORMALS { gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "${prefix}" | gzip -c > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + } diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml b/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml index 45d88c1..707c207 100644 --- a/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml @@ -1,5 +1,5 @@ name: gatk4_createsomaticpanelofnormals -description: Create a panel of normals contraining germline and artifactual sites +description: Create a panel of normals constraining germline and artifactual sites for use with mutect2. keywords: - createsomaticpanelofnormals diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/tests/main.nf.test b/modules/nf-core/gatk4/createsomaticpanelofnormals/tests/main.nf.test new file mode 100644 index 0000000..95cb572 --- /dev/null +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/tests/main.nf.test @@ -0,0 +1,90 @@ +nextflow_process { + + name "Test Process GATK4_CREATESOMATICPANELOFNORMALS" + script "../main.nf" + process "GATK4_CREATESOMATICPANELOFNORMALS" + + tag "modules" + tag "modules_nfcore" + tag "untar" + tag "gatk4" + tag "gatk4/createsomaticpanelofnormals" + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [[id:'test'], file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz", checkIfExists: true) ] + """ + } + } + } + + test("homo sapiens - genomicsdb") { + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = [ [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true) + ] + input[2] = [ [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.fai", checkIfExists: true) + ] + input[3] = [ [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.dict", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.vcf[0][1]).name, + file(process.out.tbi[0][1]).name, + ).match() } + ) + } + + } + + test("homo sapiens - genomicsdb - stub") { + + options "-stub" + + when { + process { + """ + input[0] = UNTAR.out.untar + input[1] = [ [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true) + ] + input[2] = [ [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.fai", checkIfExists: true) + ] + input[3] = [ [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.dict", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.vcf[0][1]).name, + file(process.out.tbi[0][1]).name, + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/tests/main.nf.test.snap b/modules/nf-core/gatk4/createsomaticpanelofnormals/tests/main.nf.test.snap new file mode 100644 index 0000000..d4da4f2 --- /dev/null +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/tests/main.nf.test.snap @@ -0,0 +1,30 @@ +{ + "homo sapiens - genomicsdb": { + "content": [ + [ + "versions.yml:md5,672d1b17845a11d8a296d1885f3a9fa4" + ], + "test.vcf.gz", + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T20:27:41.348681175" + }, + "homo sapiens - genomicsdb - stub": { + "content": [ + [ + "versions.yml:md5,672d1b17845a11d8a296d1885f3a9fa4" + ], + "test.vcf.gz", + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-07T20:27:58.84258624" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/README.md b/modules/nf-core/gatk4/determinegermlinecontigploidy/README.md deleted file mode 100644 index c6a4545..0000000 --- a/modules/nf-core/gatk4/determinegermlinecontigploidy/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# Conda is not supported at the moment - -The [bioconda](https://bioconda.github.io/recipes/gatk4/README.html) recipe is not fully working as expected, cf [github issue](https://github.com/broadinstitute/gatk/issues/7811) - -Hence, we are using the docker container provided by the authors of the tool: - -- [broadinstitute/gatk](https://hub.docker.com/r/broadinstitute/gatk) - -This image is mirrored on the [nf-core quay.io](https://quay.io/repository/nf-core/gatk) for convenience. diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/environment.yml b/modules/nf-core/gatk4/determinegermlinecontigploidy/environment.yml new file mode 100644 index 0000000..b562b72 --- /dev/null +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf b/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf index fc98e80..6edccf6 100644 --- a/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf @@ -3,8 +3,10 @@ process GATK4_DETERMINEGERMLINECONTIGPLOIDY { tag "$meta.id" label 'process_single' - //Conda is not supported at the moment: https://github.com/broadinstitute/gatk/issues/7811 - container "nf-core/gatk:4.5.0.0" //Biocontainers is missing a package + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: tuple val(meta), path(counts), path(bed), path(exclude_beds) @@ -20,10 +22,6 @@ process GATK4_DETERMINEGERMLINECONTIGPLOIDY { task.ext.when == null || task.ext.when script: - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "GATK4_DETERMINEGERMLINECONTIGPLOIDY module does not support Conda. Please use Docker / Singularity / Podman instead." - } def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def intervals = bed ? "--intervals ${bed}" : "" @@ -40,6 +38,7 @@ process GATK4_DETERMINEGERMLINECONTIGPLOIDY { } """ export THEANO_FLAGS="base_compiledir=\$PWD" + export PYTENSOR_FLAGS="base_compiledir=\$PWD" export OMP_NUM_THREADS=${task.cpus} export MKL_NUM_THREADS=${task.cpus} @@ -62,10 +61,6 @@ process GATK4_DETERMINEGERMLINECONTIGPLOIDY { """ stub: - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "GATK4_DETERMINEGERMLINECONTIGPLOIDY module does not support Conda. Please use Docker / Singularity / Podman instead." - } prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}-calls diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/tests/main.nf.test b/modules/nf-core/gatk4/determinegermlinecontigploidy/tests/main.nf.test new file mode 100644 index 0000000..a6d0fb7 --- /dev/null +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/tests/main.nf.test @@ -0,0 +1,105 @@ +nextflow_process { + + name "Test Process GATK4_DETERMINEGERMLINECONTIGPLOIDY" + script "../main.nf" + config "./nextflow.config" + process "GATK4_DETERMINEGERMLINECONTIGPLOIDY" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/collectreadcounts" + tag "gatk4/determinegermlinecontigploidy" + + setup { + run("GATK4_COLLECTREADCOUNTS") { + script "../../collectreadcounts/main.nf" + process { + """ + bed = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)) + + ch_input = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true) + ], + [ + [ id:'test2', single_end:false ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai", checkIfExists: true) + ], + ).combine(bed) + + ch_fasta = Channel.of([ [ id:'test' ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true)]).collect() + ch_fai = Channel.of([ [ id:'test' ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.fai", checkIfExists: true)]).collect() + ch_dict = Channel.of([ [ id:'test' ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.dict", checkIfExists: true)]).collect() + + input = [ch_input, ch_fasta, ch_fai, ch_dict] + """ + } + } + } + + test("homo sapiens - bam") { + + when { + process { + """ + contig_ploidy_table = file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv", checkIfExists: true) + bed = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)) + + input[0] = GATK4_COLLECTREADCOUNTS.out.tsv + .map({ meta, tsv -> [[id:'test'] , tsv ] }) + .groupTuple() + .combine(bed) + .map({ meta, counts, bed -> [ meta, counts, bed, [] ]}) + input[1] = [[],[]] + input[2] = contig_ploidy_table + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.calls[0][1]).list().sort().collect { path -> file(path).name }, + file(process.out.model[0][1]).list().sort().collect { path -> file(path).name }, + ).match() } + ) + } + + } + + test("homo sapiens - bam - stub") { + + options "-stub" + + when { + process { + """ + contig_ploidy_table = file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv", checkIfExists: true) + bed = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)) + + input[0] = GATK4_COLLECTREADCOUNTS.out.tsv + .map({ meta, tsv -> [[id:'test'] , tsv ] }) + .groupTuple() + .combine(bed) + .map({ meta, counts, bed -> [ meta, counts, bed, [] ]}) + input[1] = [[],[]] + input[2] = contig_ploidy_table + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/tests/main.nf.test.snap b/modules/nf-core/gatk4/determinegermlinecontigploidy/tests/main.nf.test.snap new file mode 100644 index 0000000..83d018e --- /dev/null +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/tests/main.nf.test.snap @@ -0,0 +1,77 @@ +{ + "homo sapiens - bam": { + "content": [ + [ + "versions.yml:md5,0dd7026e3a5627b4e3cb464879c9c817" + ], + [ + "SAMPLE_0", + "SAMPLE_1" + ], + [ + "contig_ploidy_prior.tsv", + "gcnvkernel_version.json", + "interval_list.tsv", + "mu_mean_bias_j_interval__.tsv", + "mu_psi_j_log__.tsv", + "ploidy_config.json", + "std_mean_bias_j_interval__.tsv", + "std_psi_j_log__.tsv" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-18T10:16:43.182419213" + }, + "homo sapiens - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test-calls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test-model:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,0dd7026e3a5627b4e3cb464879c9c817" + ], + "calls": [ + [ + { + "id": "test" + }, + "test-calls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "model": [ + [ + { + "id": "test" + }, + "test-model:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,0dd7026e3a5627b4e3cb464879c9c817" + ] + } + ], + "meta": { + "nf-test": "0.9.1", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-06T10:23:35.989186048" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/tests/nextflow.config b/modules/nf-core/gatk4/determinegermlinecontigploidy/tests/nextflow.config new file mode 100644 index 0000000..2767dac --- /dev/null +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: GATK4_COLLECTREADCOUNTS { + ext.args = "--format TSV --interval-merging-rule OVERLAPPING_ONLY" + } + withName: GATK4_DETERMINEGERMLINECONTIGPLOIDY { + ext.args = "--interval-merging-rule OVERLAPPING_ONLY" + } +} diff --git a/modules/nf-core/gatk4/filterintervals/environment.yml b/modules/nf-core/gatk4/filterintervals/environment.yml index 55993f4..b562b72 100644 --- a/modules/nf-core/gatk4/filterintervals/environment.yml +++ b/modules/nf-core/gatk4/filterintervals/environment.yml @@ -1,5 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::gatk4=4.5.0.0 + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/filterintervals/main.nf b/modules/nf-core/gatk4/filterintervals/main.nf index bf1f3a0..ada752d 100644 --- a/modules/nf-core/gatk4/filterintervals/main.nf +++ b/modules/nf-core/gatk4/filterintervals/main.nf @@ -1,11 +1,11 @@ process GATK4_FILTERINTERVALS { tag "$meta.id" - label 'process_medium' + label 'process_single' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: tuple val(meta), path(intervals) diff --git a/modules/nf-core/gatk4/filterintervals/tests/main.nf.test.snap b/modules/nf-core/gatk4/filterintervals/tests/main.nf.test.snap index c962666..add6095 100644 --- a/modules/nf-core/gatk4/filterintervals/tests/main.nf.test.snap +++ b/modules/nf-core/gatk4/filterintervals/tests/main.nf.test.snap @@ -11,7 +11,7 @@ ] ], "1": [ - "versions.yml:md5,9a445090a815c06982d5deb5ed7d5e30" + "versions.yml:md5,6fbacd816f4d48eae20f1fd423cb4e56" ], "interval_list": [ [ @@ -22,14 +22,14 @@ ] ], "versions": [ - "versions.yml:md5,9a445090a815c06982d5deb5ed7d5e30" + "versions.yml:md5,6fbacd816f4d48eae20f1fd423cb4e56" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-08-26T12:25:35.933532" + "timestamp": "2024-10-31T11:03:52.645895175" } } \ No newline at end of file diff --git a/modules/nf-core/gatk4/genomicsdbimport/environment.yml b/modules/nf-core/gatk4/genomicsdbimport/environment.yml index 55993f4..b562b72 100644 --- a/modules/nf-core/gatk4/genomicsdbimport/environment.yml +++ b/modules/nf-core/gatk4/genomicsdbimport/environment.yml @@ -1,5 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::gatk4=4.5.0.0 + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/genomicsdbimport/main.nf b/modules/nf-core/gatk4/genomicsdbimport/main.nf index 6f1d4c5..90f1200 100644 --- a/modules/nf-core/gatk4/genomicsdbimport/main.nf +++ b/modules/nf-core/gatk4/genomicsdbimport/main.nf @@ -1,11 +1,11 @@ process GATK4_GENOMICSDBIMPORT { tag "$meta.id" - label 'process_medium' + label 'process_single' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: tuple val(meta), path(vcf), path(tbi), path(interval_file), val(interval_value), path(wspace) diff --git a/modules/nf-core/gatk4/genomicsdbimport/meta.yml b/modules/nf-core/gatk4/genomicsdbimport/meta.yml index 174ae2e..ba734b2 100644 --- a/modules/nf-core/gatk4/genomicsdbimport/meta.yml +++ b/modules/nf-core/gatk4/genomicsdbimport/meta.yml @@ -38,7 +38,7 @@ input: pattern: "*.interval_list" - interval_value: type: string - description: if an intervals file has not been spcified, the value enetered + description: if an intervals file has not been specified, the value entered here will be used as an interval via the "-L" argument pattern: "example: chr1:1000-10000" - wspace: diff --git a/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap index 55ced0d..cb47a43 100644 --- a/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap +++ b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap @@ -3,14 +3,14 @@ "content": [ "test.interval_list:md5,4c85812ac15fc1cd29711a851d23c0bf", [ - "versions.yml:md5,c1233a04213021aa66599a36e0fb28cc" + "versions.yml:md5,5e722d5d01b9b1a0076ce80d0199b157" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-09T10:42:51.836379" + "timestamp": "2024-10-31T11:09:55.9195126" }, "test_gatk4_genomicsdbimport_create_genomicsdb": { "content": [ @@ -22,14 +22,14 @@ "vidmap.json" ], [ - "versions.yml:md5,c1233a04213021aa66599a36e0fb28cc" + "versions.yml:md5,5e722d5d01b9b1a0076ce80d0199b157" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-09T10:42:36.846239" + "timestamp": "2024-10-31T11:09:25.302350893" }, "test_gatk4_genomicsdbimport_update_genomicsdb": { "content": [ @@ -41,14 +41,14 @@ "vidmap.json" ], [ - "versions.yml:md5,c1233a04213021aa66599a36e0fb28cc" + "versions.yml:md5,5e722d5d01b9b1a0076ce80d0199b157" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-09T10:43:09.00769" + "timestamp": "2024-10-31T11:10:13.206208761" }, "test_gatk4_genomicsdbimport_stub": { "content": [ @@ -68,7 +68,7 @@ ], "3": [ - "versions.yml:md5,c1233a04213021aa66599a36e0fb28cc" + "versions.yml:md5,5e722d5d01b9b1a0076ce80d0199b157" ], "genomicsdb": [ [ @@ -85,14 +85,14 @@ ], "versions": [ - "versions.yml:md5,c1233a04213021aa66599a36e0fb28cc" + "versions.yml:md5,5e722d5d01b9b1a0076ce80d0199b157" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-07-09T10:43:20.921712" + "timestamp": "2024-10-31T11:10:36.510210505" } } \ No newline at end of file diff --git a/modules/nf-core/gatk4/genomicsdbimport/tests/tags.yml b/modules/nf-core/gatk4/genomicsdbimport/tests/tags.yml deleted file mode 100644 index 8a00857..0000000 --- a/modules/nf-core/gatk4/genomicsdbimport/tests/tags.yml +++ /dev/null @@ -1,3 +0,0 @@ -gatk4/genomicsdbimport: - - "modules/nf-core/gatk4/genomicsdbimport/**" - - "modules/nf-core/untar/**" diff --git a/modules/nf-core/gatk4/germlinecnvcaller/README.md b/modules/nf-core/gatk4/germlinecnvcaller/README.md deleted file mode 100644 index c6a4545..0000000 --- a/modules/nf-core/gatk4/germlinecnvcaller/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# Conda is not supported at the moment - -The [bioconda](https://bioconda.github.io/recipes/gatk4/README.html) recipe is not fully working as expected, cf [github issue](https://github.com/broadinstitute/gatk/issues/7811) - -Hence, we are using the docker container provided by the authors of the tool: - -- [broadinstitute/gatk](https://hub.docker.com/r/broadinstitute/gatk) - -This image is mirrored on the [nf-core quay.io](https://quay.io/repository/nf-core/gatk) for convenience. diff --git a/modules/nf-core/gatk4/germlinecnvcaller/environment.yml b/modules/nf-core/gatk4/germlinecnvcaller/environment.yml new file mode 100644 index 0000000..b562b72 --- /dev/null +++ b/modules/nf-core/gatk4/germlinecnvcaller/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/germlinecnvcaller/main.nf b/modules/nf-core/gatk4/germlinecnvcaller/main.nf index 90aa7e4..e8afb1f 100644 --- a/modules/nf-core/gatk4/germlinecnvcaller/main.nf +++ b/modules/nf-core/gatk4/germlinecnvcaller/main.nf @@ -2,8 +2,10 @@ process GATK4_GERMLINECNVCALLER { tag "$meta.id" label 'process_single' - //Conda is not supported at the moment: https://github.com/broadinstitute/gatk/issues/7811 - container "nf-core/gatk:4.5.0.0" //Biocontainers is missing a package + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: tuple val(meta), path(tsv), path(intervals), path(ploidy), path(model) @@ -18,10 +20,6 @@ process GATK4_GERMLINECNVCALLER { task.ext.when == null || task.ext.when script: - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "GATK4_GERMLINECNVCALLER module does not support Conda. Please use Docker / Singularity / Podman instead." - } def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def intervals_command = intervals ? "--intervals ${intervals}" : "" @@ -38,6 +36,7 @@ process GATK4_GERMLINECNVCALLER { } """ export THEANO_FLAGS="base_compiledir=\$PWD" + export PYTENSOR_FLAGS="base_compiledir=\$PWD" export OMP_NUM_THREADS=${task.cpus} export MKL_NUM_THREADS=${task.cpus} @@ -58,10 +57,6 @@ process GATK4_GERMLINECNVCALLER { """ stub: - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "GATK4_GERMLINECNVCALLER module does not support Conda. Please use Docker / Singularity / Podman instead." - } def prefix = task.ext.prefix ?: "${meta.id}" """ mkdir -p ${prefix}-cnv-calls/${prefix}-calls diff --git a/modules/nf-core/gatk4/germlinecnvcaller/tests/main.nf.test b/modules/nf-core/gatk4/germlinecnvcaller/tests/main.nf.test new file mode 100644 index 0000000..d14ef9a --- /dev/null +++ b/modules/nf-core/gatk4/germlinecnvcaller/tests/main.nf.test @@ -0,0 +1,126 @@ +nextflow_process { + + name "Test Process GATK4_GERMLINECNVCALLER" + script "../main.nf" + config "./nextflow.config" + process "GATK4_GERMLINECNVCALLER" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/collectreadcounts" + tag "gatk4/determinegermlinecontigploidy" + tag "gatk4/bedtointervallist" + tag "gatk4/germlinecnvcaller" + + setup { + run("GATK4_COLLECTREADCOUNTS") { + script "../../collectreadcounts/main.nf" + process { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.multi_intervals.bed", checkIfExists: true) + ], + [ + [ id:'test2', single_end:false ], // meta map + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.multi_intervals.bed", checkIfExists: true) + ]) + input[1] = Channel.value([ [ id:'test' ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true)]) + input[2] = Channel.value([ [ id:'test' ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.fai", checkIfExists:true)]) + input[3] = Channel.value([ [ id:'test' ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.dict", checkIfExists: true)]) + + """ + } + } + run("GATK4_DETERMINEGERMLINECONTIGPLOIDY") { + script "../../determinegermlinecontigploidy/main.nf" + process { + """ + bed = Channel.value(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.multi_intervals.bed", checkIfExists: true)) + contig_ploidy_table = file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv", checkIfExists:true) + + input[0] = GATK4_COLLECTREADCOUNTS.out.tsv + .map({ meta, tsv -> [[id:'test'], tsv ] }) + .groupTuple() + .combine(bed) + .map({ meta, counts, bed -> [ meta, counts, bed, [] ]}) + input[1] = [[],[]] + input[2] = contig_ploidy_table + """ + } + } + run("GATK4_BEDTOINTERVALLIST") { + script "../../bedtointervallist/main.nf" + process { + """ + input[0] = Channel.of([[ id:'test' ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.multi_intervals.bed", checkIfExists: true)]) + input[1] = Channel.value([ [ id:'test' ], file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.dict", checkIfExists: true)]) + """ + } + } + } + + test("homo sapiens - bam") { + when { + process { + """ + input[0] = GATK4_COLLECTREADCOUNTS.out.tsv + .map({ meta, tsv -> [[id:'test'], tsv ] }) + .groupTuple() + .combine(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.calls) + .combine(GATK4_BEDTOINTERVALLIST.out.interval_list) + .map{ meta, counts, meta2, calls, meta3, bed -> [ meta, counts, bed, calls, [] ]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.cohortcalls[0][1]).list().sort().collect { path -> file(path).name }, + file(process.out.cohortmodel[0][1]).list().sort().collect { path -> file(path).name }, + ).match() } + ) + } + + } + + test("homo sapiens - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = GATK4_COLLECTREADCOUNTS.out.tsv + .map({ meta, tsv -> [[id:'test'], tsv ] }) + .groupTuple() + .combine(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.calls) + .combine(GATK4_BEDTOINTERVALLIST.out.interval_list) + .map{ meta, counts, meta2, calls, meta3, bed -> [ meta, counts, bed, calls, [] ]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.cohortcalls[0][1]).list().sort().collect { path -> file(path).name }, + file(process.out.cohortmodel[0][1]).list().sort().collect { path -> file(path).name }, + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/germlinecnvcaller/tests/main.nf.test.snap b/modules/nf-core/gatk4/germlinecnvcaller/tests/main.nf.test.snap new file mode 100644 index 0000000..bbec501 --- /dev/null +++ b/modules/nf-core/gatk4/germlinecnvcaller/tests/main.nf.test.snap @@ -0,0 +1,55 @@ +{ + "homo sapiens - bam": { + "content": [ + [ + "versions.yml:md5,73418764ce886774da2a240630f45aff" + ], + [ + "SAMPLE_0", + "SAMPLE_1", + "calling_config.json", + "denoising_config.json", + "gcnvkernel_version.json", + "interval_list.tsv" + ], + [ + "calling_config.json", + "denoising_config.json", + "gcnvkernel_version.json", + "interval_list.tsv", + "log_q_tau_tk.tsv", + "mu_W_tu.tsv", + "mu_ard_u_interval__.tsv", + "mu_log_mean_bias_t.tsv", + "mu_psi_t_log__.tsv", + "std_W_tu.tsv", + "std_ard_u_interval__.tsv", + "std_log_mean_bias_t.tsv", + "std_psi_t_log__.tsv" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-18T08:51:49.791400262" + }, + "homo sapiens - bam - stub": { + "content": [ + [ + "versions.yml:md5,73418764ce886774da2a240630f45aff" + ], + [ + + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-18T08:52:19.983061924" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/germlinecnvcaller/tests/nextflow.config b/modules/nf-core/gatk4/germlinecnvcaller/tests/nextflow.config new file mode 100644 index 0000000..d1b64bf --- /dev/null +++ b/modules/nf-core/gatk4/germlinecnvcaller/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + withName: 'GATK4_COLLECTREADCOUNTS*' { + ext.args = "--format TSV --interval-merging-rule OVERLAPPING_ONLY" + } + withName: 'GATK4_DETERMINEGERMLINECONTIGPLOIDY' { + ext.args = "--interval-merging-rule OVERLAPPING_ONLY" + } + withName: 'GATK4_GERMLINECNVCALLER' { + ext.args = "--interval-merging-rule OVERLAPPING_ONLY --run-mode COHORT" + } +} diff --git a/modules/nf-core/gatk4/indexfeaturefile/environment.yml b/modules/nf-core/gatk4/indexfeaturefile/environment.yml index 55993f4..b562b72 100644 --- a/modules/nf-core/gatk4/indexfeaturefile/environment.yml +++ b/modules/nf-core/gatk4/indexfeaturefile/environment.yml @@ -1,5 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::gatk4=4.5.0.0 + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/indexfeaturefile/main.nf b/modules/nf-core/gatk4/indexfeaturefile/main.nf index b1a2698..6993537 100644 --- a/modules/nf-core/gatk4/indexfeaturefile/main.nf +++ b/modules/nf-core/gatk4/indexfeaturefile/main.nf @@ -4,8 +4,8 @@ process GATK4_INDEXFEATUREFILE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: tuple val(meta), path(feature_file) diff --git a/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test.snap b/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test.snap index fc19333..788b929 100644 --- a/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test.snap +++ b/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test.snap @@ -12,14 +12,14 @@ "test_gatk4_indexfeaturefile_vcf": { "content": [ [ - "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + "versions.yml:md5,3a4023239bad8505fdf37002915f8a7f" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.2" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-05-22T18:57:20.602472" + "timestamp": "2024-10-31T11:22:21.619530121" }, "geneome.bed.idx": { "content": [ @@ -53,7 +53,7 @@ ] ], "1": [ - "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + "versions.yml:md5,3a4023239bad8505fdf37002915f8a7f" ], "index": [ [ @@ -64,15 +64,15 @@ ] ], "versions": [ - "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + "versions.yml:md5,3a4023239bad8505fdf37002915f8a7f" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.2" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-05-22T18:57:03.058351" + "timestamp": "2024-10-31T11:22:06.537858152" }, "test_gatk4_indexfeaturefile_vcf_gz": { "content": [ @@ -86,7 +86,7 @@ ] ], "1": [ - "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + "versions.yml:md5,3a4023239bad8505fdf37002915f8a7f" ], "index": [ [ @@ -97,15 +97,15 @@ ] ], "versions": [ - "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + "versions.yml:md5,3a4023239bad8505fdf37002915f8a7f" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.2" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-05-22T18:57:51.861697" + "timestamp": "2024-10-31T11:22:37.41693965" }, "test.genome.vcf.idx": { "content": [ @@ -120,13 +120,13 @@ "test_gatk4_indexfeaturefile_bed": { "content": [ [ - "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + "versions.yml:md5,3a4023239bad8505fdf37002915f8a7f" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.2" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-05-22T18:56:46.885162" + "timestamp": "2024-10-31T11:21:52.805375486" } } \ No newline at end of file diff --git a/modules/nf-core/gatk4/indexfeaturefile/tests/tags.yml b/modules/nf-core/gatk4/indexfeaturefile/tests/tags.yml deleted file mode 100644 index 041bd3d..0000000 --- a/modules/nf-core/gatk4/indexfeaturefile/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -gatk4/indexfeaturefile: - - "modules/nf-core/gatk4/indexfeaturefile/**" diff --git a/modules/nf-core/gatk4/intervallisttools/environment.yml b/modules/nf-core/gatk4/intervallisttools/environment.yml index 55993f4..b562b72 100644 --- a/modules/nf-core/gatk4/intervallisttools/environment.yml +++ b/modules/nf-core/gatk4/intervallisttools/environment.yml @@ -1,5 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::gatk4=4.5.0.0 + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/intervallisttools/main.nf b/modules/nf-core/gatk4/intervallisttools/main.nf index 400fa03..bf20d4c 100644 --- a/modules/nf-core/gatk4/intervallisttools/main.nf +++ b/modules/nf-core/gatk4/intervallisttools/main.nf @@ -1,11 +1,11 @@ process GATK4_INTERVALLISTTOOLS { tag "$meta.id" - label 'process_medium' + label 'process_single' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: tuple val(meta), path(intervals) diff --git a/modules/nf-core/gatk4/intervallisttools/tests/main.nf.test.snap b/modules/nf-core/gatk4/intervallisttools/tests/main.nf.test.snap index 7718ed0..3be7aca 100644 --- a/modules/nf-core/gatk4/intervallisttools/tests/main.nf.test.snap +++ b/modules/nf-core/gatk4/intervallisttools/tests/main.nf.test.snap @@ -16,7 +16,7 @@ ] ], "1": [ - "versions.yml:md5,ff682cc9ad70d65a80280df57b316b03" + "versions.yml:md5,a2a074f61ecf1e0a21d1aeefd139125f" ], "interval_list": [ [ @@ -32,15 +32,15 @@ ] ], "versions": [ - "versions.yml:md5,ff682cc9ad70d65a80280df57b316b03" + "versions.yml:md5,a2a074f61ecf1e0a21d1aeefd139125f" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.2" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-05-22T21:26:22.252885" + "timestamp": "2024-10-31T10:37:46.848386632" }, "test_gatk4_intervallisttools -stub": { "content": [ @@ -59,7 +59,7 @@ ] ], "1": [ - "versions.yml:md5,ff682cc9ad70d65a80280df57b316b03" + "versions.yml:md5,a2a074f61ecf1e0a21d1aeefd139125f" ], "interval_list": [ [ @@ -75,14 +75,14 @@ ] ], "versions": [ - "versions.yml:md5,ff682cc9ad70d65a80280df57b316b03" + "versions.yml:md5,a2a074f61ecf1e0a21d1aeefd139125f" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.2" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-05-22T22:15:11.772344" + "timestamp": "2024-10-31T10:38:00.631338482" } } \ No newline at end of file diff --git a/modules/nf-core/gatk4/intervallisttools/tests/tags.yml b/modules/nf-core/gatk4/intervallisttools/tests/tags.yml deleted file mode 100644 index bf85ff5..0000000 --- a/modules/nf-core/gatk4/intervallisttools/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -gatk4/intervallisttools: - - "modules/nf-core/gatk4/intervallisttools/**" diff --git a/modules/nf-core/gatk4/mutect2/environment.yml b/modules/nf-core/gatk4/mutect2/environment.yml index 55993f4..b562b72 100644 --- a/modules/nf-core/gatk4/mutect2/environment.yml +++ b/modules/nf-core/gatk4/mutect2/environment.yml @@ -1,5 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::gatk4=4.5.0.0 + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/mutect2/main.nf b/modules/nf-core/gatk4/mutect2/main.nf index 79d8d28..756dfca 100644 --- a/modules/nf-core/gatk4/mutect2/main.nf +++ b/modules/nf-core/gatk4/mutect2/main.nf @@ -1,11 +1,11 @@ process GATK4_MUTECT2 { tag "$meta.id" - label 'process_medium' + label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: tuple val(meta), path(input), path(input_index), path(intervals) @@ -62,10 +62,10 @@ process GATK4_MUTECT2 { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.vcf.gz + echo "" | gzip > ${prefix}.vcf.gz touch ${prefix}.vcf.gz.tbi touch ${prefix}.vcf.gz.stats - touch ${prefix}.f1r2.tar.gz + echo "" | gzip > ${prefix}.f1r2.tar.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gatk4/mutect2/tests/f1r2.config b/modules/nf-core/gatk4/mutect2/tests/f1r2.config deleted file mode 100644 index 2d3c8a1..0000000 --- a/modules/nf-core/gatk4/mutect2/tests/f1r2.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = { "--normal-sample $meta.normal_id --f1r2-tar-gz ${meta.id}.f1r2.tar.gz" } -} diff --git a/modules/nf-core/gatk4/mutect2/tests/main.nf.test b/modules/nf-core/gatk4/mutect2/tests/main.nf.test index aea8d22..b0e2214 100644 --- a/modules/nf-core/gatk4/mutect2/tests/main.nf.test +++ b/modules/nf-core/gatk4/mutect2/tests/main.nf.test @@ -8,10 +8,13 @@ nextflow_process { tag "modules_nfcore" tag "gatk4" tag "gatk4/mutect2" + config "./nextflow.config" - test("tumor_normal_pair") { - config "./pair.config" + test("human - bam - tumor_normal_pair") { when { + params { + module_args = "--normal-sample normal" + } process { """ input[0] = [ @@ -55,7 +58,7 @@ nextflow_process { { assert process.success }, { assert snapshot( - process.out.vcf.collect { file(it[1]).getName() }, + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, process.out.tbi.collect { file(it[1]).getName() }, process.out.stats, process.out.f1r2, @@ -66,9 +69,12 @@ nextflow_process { } } - test("tumor_normal_pair_f1r2") { - config "./f1r2.config" + test("human - bam - tumor_normal_pair_f1r2") { + when { + params { + module_args = "--normal-sample normal --f1r2-tar-gz test.f1r2.tar.gz" + } process { """ input[0] = [ @@ -111,7 +117,7 @@ nextflow_process { { assert process.success }, { assert snapshot( - process.out.vcf.collect { file(it[1]).getName() }, + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, process.out.tbi.collect { file(it[1]).getName() }, process.out.stats, process.out.f1r2.collect { file(it[1]).getName() }, @@ -121,8 +127,11 @@ nextflow_process { ) } } - test("tumor_single"){ + test("human - bam - tumor_only"){ when { + params { + module_args = '' + } process { """ input[0] = [ @@ -155,7 +164,7 @@ nextflow_process { { assert process.success }, { assert snapshot( - process.out.vcf.collect { file(it[1]).getName() }, + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, process.out.tbi.collect { file(it[1]).getName() }, process.out.stats, process.out.f1r2, @@ -165,8 +174,11 @@ nextflow_process { ) } } - test("cram_input"){ + test("human - cram"){ when { + params { + module_args = '' + } process{ """ input[0] = [ @@ -199,7 +211,7 @@ nextflow_process { { assert process.success }, { assert snapshot( - process.out.vcf.collect { file(it[1]).getName() }, + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, process.out.tbi.collect { file(it[1]).getName() }, process.out.stats, process.out.f1r2, @@ -210,8 +222,11 @@ nextflow_process { } } - test("generate_pon") { + test("human - bam - generate_pon") { when { + params { + module_args = '' + } process { """ input[0] = [ @@ -244,7 +259,7 @@ nextflow_process { { assert process.success }, { assert snapshot( - process.out.vcf.collect { file(it[1]).getName() }, + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, process.out.tbi.collect { file(it[1]).getName() }, process.out.stats, process.out.f1r2, @@ -255,8 +270,11 @@ nextflow_process { } } - test("mitochondria"){ + test("mitochondria - bam"){ when { + params { + module_args = "--mitochondria-mode" + } process { """ input[0] = [ @@ -289,7 +307,7 @@ nextflow_process { { assert process.success }, { assert snapshot( - process.out.vcf.collect { file(it[1]).getName() }, + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, process.out.tbi.collect { file(it[1]).getName() }, process.out.stats, process.out.f1r2, @@ -300,9 +318,14 @@ nextflow_process { } } - test("tumor_normal_pair_f1r2_stubs"){ - options "-stub-run" + test("human - bam - tumor_normal_pair_f1r2 - stub"){ + + options "-stub" + when { + params { + module_args = '' + } process { """ input[0] = [ @@ -343,15 +366,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { - assert snapshot( - process.out.vcf.collect { file(it[1]).getName() }, - process.out.tbi.collect { file(it[1]).getName() }, - process.out.stats.collect { file(it[1]).getName() }, - process.out.f1r2.collect { file(it[1]).getName() }, - process.out.versions.collect { file(it[1]).getName() } - ).match() - } + { assert snapshot(process.out).match() } ) } diff --git a/modules/nf-core/gatk4/mutect2/tests/main.nf.test.snap b/modules/nf-core/gatk4/mutect2/tests/main.nf.test.snap index f047af1..80d07bc 100644 --- a/modules/nf-core/gatk4/mutect2/tests/main.nf.test.snap +++ b/modules/nf-core/gatk4/mutect2/tests/main.nf.test.snap @@ -1,33 +1,34 @@ { - "tumor_normal_pair_f1r2_stubs": { + "human - bam - generate_pon": { "content": [ - [ - "test.vcf.gz" - ], + "876aa6be01c0c8fc71ad8e99ed842240", [ "test.vcf.gz.tbi" ], [ - "test.vcf.gz.stats" + [ + { + "id": "test" + }, + "test.vcf.gz.stats:md5,b569ce66bbffe9588b3d221e821023ee" + ] ], [ - "test.f1r2.tar.gz" + ], [ - "h" + "versions.yml:md5,8b99605d0a404d9ccbf8628ca881a8a4" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-03-21T10:14:45.599103891" + "timestamp": "2025-04-08T15:44:32.616294519" }, - "generate_pon": { + "human - cram": { "content": [ - [ - "test.vcf.gz" - ], + "1b65f1a163b517944bf2e4b74230e035", [ "test.vcf.gz.tbi" ], @@ -36,27 +37,25 @@ { "id": "test" }, - "test.vcf.gz.stats:md5,b569ce66bbffe9588b3d221e821023ee" + "test.vcf.gz.stats:md5,55ed641e16089afb33cdbc478e202d3d" ] ], [ ], [ - "versions.yml:md5,d94731c50c20569fe9896235a843f382" + "versions.yml:md5,8b99605d0a404d9ccbf8628ca881a8a4" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-03-20T15:57:18.264453766" + "timestamp": "2025-04-08T15:42:27.117032089" }, - "mitochondria": { + "mitochondria - bam": { "content": [ - [ - "test.vcf.gz" - ], + "ea70f79e33805a2c0b47b32a48a8d26f", [ "test.vcf.gz.tbi" ], @@ -65,56 +64,54 @@ { "id": "test" }, - "test.vcf.gz.stats:md5,4f77301a125913170b8e9e7828b4ca3f" + "test.vcf.gz.stats:md5,fc6ea14ca2da346babe78161beea28c9" ] ], [ ], [ - "versions.yml:md5,d94731c50c20569fe9896235a843f382" + "versions.yml:md5,8b99605d0a404d9ccbf8628ca881a8a4" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-03-20T16:05:47.668766905" + "timestamp": "2025-04-08T15:44:46.867520687" }, - "cram_input": { + "human - bam - tumor_normal_pair": { "content": [ - [ - "test.vcf.gz" - ], + "7418ed45a029394253817a5eb7149334", [ "test.vcf.gz.tbi" ], [ [ { - "id": "test" + "id": "test", + "normal_id": "normal", + "tumor_id": "tumour" }, - "test.vcf.gz.stats:md5,55ed641e16089afb33cdbc478e202d3d" + "test.vcf.gz.stats:md5,17d2091015d04cbd4a26b7a67dc659e6" ] ], [ ], [ - "versions.yml:md5,d94731c50c20569fe9896235a843f382" + "versions.yml:md5,8b99605d0a404d9ccbf8628ca881a8a4" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-03-20T15:52:27.894730554" + "timestamp": "2025-04-08T16:02:48.334206829" }, - "tumor_single": { + "human - bam - tumor_only": { "content": [ - [ - "test.vcf.gz" - ], + "1b65f1a163b517944bf2e4b74230e035", [ "test.vcf.gz.tbi" ], @@ -130,51 +127,115 @@ ], [ - "versions.yml:md5,d94731c50c20569fe9896235a843f382" + "versions.yml:md5,8b99605d0a404d9ccbf8628ca881a8a4" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-03-20T15:43:28.935723443" + "timestamp": "2025-04-08T15:40:12.534802185" }, - "tumor_normal_pair": { + "human - bam - tumor_normal_pair_f1r2 - stub": { "content": [ - [ - "test.vcf.gz" - ], - [ - "test.vcf.gz.tbi" - ], - [ - [ - { - "id": "test", - "normal_id": "normal", - "tumor_id": "tumour" - }, - "test.vcf.gz.stats:md5,17d2091015d04cbd4a26b7a67dc659e6" + { + "0": [ + [ + { + "id": "test", + "normal_id": "normal", + "tumor_id": "tumour" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "normal_id": "normal", + "tumor_id": "tumour" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "normal_id": "normal", + "tumor_id": "tumour" + }, + "test.vcf.gz.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "normal_id": "normal", + "tumor_id": "tumour" + }, + "test.f1r2.tar.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "4": [ + "versions.yml:md5,8b99605d0a404d9ccbf8628ca881a8a4" + ], + "f1r2": [ + [ + { + "id": "test", + "normal_id": "normal", + "tumor_id": "tumour" + }, + "test.f1r2.tar.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "stats": [ + [ + { + "id": "test", + "normal_id": "normal", + "tumor_id": "tumour" + }, + "test.vcf.gz.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + [ + { + "id": "test", + "normal_id": "normal", + "tumor_id": "tumour" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test", + "normal_id": "normal", + "tumor_id": "tumour" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,8b99605d0a404d9ccbf8628ca881a8a4" ] - ], - [ - - ], - [ - "versions.yml:md5,d94731c50c20569fe9896235a843f382" - ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-03-20T15:31:31.913366311" + "timestamp": "2025-04-08T15:44:59.443047344" }, - "tumor_normal_pair_f1r2": { + "human - bam - tumor_normal_pair_f1r2": { "content": [ - [ - "test.vcf.gz" - ], + "7418ed45a029394253817a5eb7149334", [ "test.vcf.gz.tbi" ], @@ -192,13 +253,13 @@ "test.f1r2.tar.gz" ], [ - "versions.yml:md5,d94731c50c20569fe9896235a843f382" + "versions.yml:md5,8b99605d0a404d9ccbf8628ca881a8a4" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.2" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-03-21T09:45:52.321385704" + "timestamp": "2025-04-08T16:25:12.434872797" } } \ No newline at end of file diff --git a/modules/nf-core/gatk4/mutect2/tests/mito.config b/modules/nf-core/gatk4/mutect2/tests/mito.config deleted file mode 100644 index de61d3e..0000000 --- a/modules/nf-core/gatk4/mutect2/tests/mito.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = { "--mitochondria-mode" } -} diff --git a/modules/nf-core/gatk4/mutect2/tests/nextflow.config b/modules/nf-core/gatk4/mutect2/tests/nextflow.config new file mode 100644 index 0000000..08e9428 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GATK4_MUTECT2 { + ext.args = params.module_args + } +} diff --git a/modules/nf-core/gatk4/mutect2/tests/pair.config b/modules/nf-core/gatk4/mutect2/tests/pair.config deleted file mode 100644 index 2a812b8..0000000 --- a/modules/nf-core/gatk4/mutect2/tests/pair.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = { "--normal-sample $meta.normal_id" } -} diff --git a/modules/nf-core/gatk4/mutect2/tests/tags.yml b/modules/nf-core/gatk4/mutect2/tests/tags.yml deleted file mode 100644 index 4618792..0000000 --- a/modules/nf-core/gatk4/mutect2/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -gatk4/mutect2: - - "modules/nf-core/gatk4/mutect2/**" diff --git a/modules/nf-core/gatk4/preprocessintervals/environment.yml b/modules/nf-core/gatk4/preprocessintervals/environment.yml index 55993f4..b562b72 100644 --- a/modules/nf-core/gatk4/preprocessintervals/environment.yml +++ b/modules/nf-core/gatk4/preprocessintervals/environment.yml @@ -1,5 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::gatk4=4.5.0.0 + # renovate: datasource=conda depName=bioconda/gatk4 + - bioconda::gatk4=4.6.1.0 + - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/preprocessintervals/main.nf b/modules/nf-core/gatk4/preprocessintervals/main.nf index dffc4bb..bab6349 100644 --- a/modules/nf-core/gatk4/preprocessintervals/main.nf +++ b/modules/nf-core/gatk4/preprocessintervals/main.nf @@ -1,11 +1,11 @@ process GATK4_PREPROCESSINTERVALS { tag "$fasta" - label 'process_medium' + label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': + 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/gatk4/preprocessintervals/tests/main.nf.test.snap b/modules/nf-core/gatk4/preprocessintervals/tests/main.nf.test.snap index 5365291..1271649 100644 --- a/modules/nf-core/gatk4/preprocessintervals/tests/main.nf.test.snap +++ b/modules/nf-core/gatk4/preprocessintervals/tests/main.nf.test.snap @@ -11,7 +11,7 @@ ] ], "1": [ - "versions.yml:md5,fc3037804d90d3d3424047cfac85d5e4" + "versions.yml:md5,275510c640bbc3050a257bece1e51f85" ], "interval_list": [ [ @@ -22,14 +22,14 @@ ] ], "versions": [ - "versions.yml:md5,fc3037804d90d3d3424047cfac85d5e4" + "versions.yml:md5,275510c640bbc3050a257bece1e51f85" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.1", + "nextflow": "24.10.0" }, - "timestamp": "2024-08-26T11:58:51.314382" + "timestamp": "2024-10-31T11:55:38.222687003" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index a27122c..812fc4c 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,5 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::multiqc=1.27 + - bioconda::multiqc=1.29 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 58d9313..0ac3c36 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.27--pyhdfd78af_0' : - 'biocontainers/multiqc:1.27--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.29--pyhdfd78af_0' : + 'biocontainers/multiqc:1.29--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index 7b7c132..25caea8 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,8f3b8c1cec5388cf2708be948c9fa42f" + "versions.yml:md5,c1fe644a37468f6dae548d98bc72c2c1" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.4" + "nextflow": "25.04.2" }, - "timestamp": "2025-01-27T09:29:57.631982377" + "timestamp": "2025-06-03T09:17:40.895950399" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,8f3b8c1cec5388cf2708be948c9fa42f" + "versions.yml:md5,c1fe644a37468f6dae548d98bc72c2c1" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.4" + "nextflow": "25.04.2" }, - "timestamp": "2025-01-27T09:30:34.743726958" + "timestamp": "2025-06-03T09:18:16.875131107" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,8f3b8c1cec5388cf2708be948c9fa42f" + "versions.yml:md5,c1fe644a37468f6dae548d98bc72c2c1" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.4" + "nextflow": "25.04.2" }, - "timestamp": "2025-01-27T09:30:21.44383553" + "timestamp": "2025-06-03T09:18:03.624717769" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml deleted file mode 100644 index bea6c0d..0000000 --- a/modules/nf-core/multiqc/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -multiqc: - - modules/nf-core/multiqc/** diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index 28c0a81..6de0095 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -10,9 +10,11 @@ process SAMTOOLS_FAIDX { input: tuple val(meta), path(fasta) tuple val(meta2), path(fai) + val get_sizes output: - tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true + tuple val(meta), path ("*.{fa,fasta}") , emit: fa, optional: true + tuple val(meta), path ("*.sizes") , emit: sizes, optional: true tuple val(meta), path ("*.fai") , emit: fai, optional: true tuple val(meta), path ("*.gzi") , emit: gzi, optional: true path "versions.yml" , emit: versions @@ -22,12 +24,15 @@ process SAMTOOLS_FAIDX { script: def args = task.ext.args ?: '' + def get_sizes_command = get_sizes ? "cut -f 1,2 ${fasta}.fai > ${fasta}.sizes" : '' """ samtools \\ faidx \\ $fasta \\ $args + ${get_sizes_command} + cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') @@ -37,9 +42,15 @@ process SAMTOOLS_FAIDX { stub: def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() def fastacmd = match[0] ? "touch ${match[0][1]}" : '' + def get_sizes_command = get_sizes ? "touch ${fasta}.sizes" : '' """ ${fastacmd} touch ${fasta}.fai + if [[ "${fasta.extension}" == "gz" ]]; then + touch ${fasta}.gzi + fi + + ${get_sizes_command} cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index 6721b2c..256a330 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -1,9 +1,10 @@ name: samtools_faidx -description: Index FASTA file +description: Index FASTA file, and optionally generate a file of chromosome sizes keywords: - index - fasta - faidx + - chromosome tools: - samtools: description: | @@ -34,6 +35,10 @@ input: type: file description: FASTA index file pattern: "*.{fai}" + - - get_sizes: + type: boolean + description: use cut to get the sizes of the index (true) or not (false) + output: - fa: - meta: @@ -55,6 +60,16 @@ output: type: file description: FASTA index file pattern: "*.{fai}" + - sizes: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sizes": + type: file + description: File containing chromosome lengths + pattern: "*.{sizes}" - gzi: - meta: type: map @@ -75,6 +90,5 @@ authors: - "@ewels" - "@phue" maintainers: - - "@drpatelh" - - "@ewels" + - "@maxulysse" - "@phue" diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test index 17244ef..64219b7 100644 --- a/modules/nf-core/samtools/faidx/tests/main.nf.test +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -16,8 +16,8 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [[],[]] + input[2] = false """ } } @@ -37,8 +37,8 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)] - input[1] = [[],[]] + input[2] = false """ } } @@ -60,9 +60,9 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [ [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + input[2] = false """ } } @@ -84,9 +84,9 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [ [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + input[2] = false """ } } @@ -106,8 +106,8 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = [[],[]] + input[2] = false """ } } @@ -119,4 +119,101 @@ nextflow_process { ) } } + + test("test_samtools_faidx_get_sizes") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[1] = [[],[]] + input[2] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_get_sizes_bgzip") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ]) + input[1] = [[],[]] + input[2] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_get_sizes - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[1] = [[],[]] + input[2] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_get_sizes_bgzip - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ]) + input[1] = [[],[]] + input[2] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + } \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap index 1bbb3ec..7372241 100644 --- a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -6,6 +6,9 @@ ], "1": [ + + ], + "2": [ [ { "id": "test", @@ -14,10 +17,10 @@ "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" ] ], - "2": [ + "3": [ ], - "3": [ + "4": [ "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" ], "fa": [ @@ -34,6 +37,9 @@ ], "gzi": [ + ], + "sizes": [ + ], "versions": [ "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" @@ -41,10 +47,142 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T17:31:48.258623157" + }, + "test_samtools_faidx_get_sizes_bgzip - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,e3e4ba35a02020d173be8d1ee04eaebf" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gzi": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e3e4ba35a02020d173be8d1ee04eaebf" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-09-16T07:57:47.450887871" + "timestamp": "2024-11-20T17:32:41.122428188" + }, + "test_samtools_faidx_get_sizes": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T17:34:02.353546697" }, "test_samtools_faidx_bgzip": { "content": [ @@ -53,6 +191,9 @@ ], "1": [ + + ], + "2": [ [ { "id": "test", @@ -61,7 +202,7 @@ "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" ] ], - "2": [ + "3": [ [ { "id": "test", @@ -70,7 +211,7 @@ "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" ] ], - "3": [ + "4": [ "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" ], "fa": [ @@ -93,6 +234,9 @@ }, "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" ] + ], + "sizes": [ + ], "versions": [ "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" @@ -100,10 +244,10 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-09-16T07:58:04.804905659" + "timestamp": "2024-11-20T17:31:55.157487176" }, "test_samtools_faidx_fasta": { "content": [ @@ -124,6 +268,9 @@ ], "3": [ + + ], + "4": [ "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" ], "fa": [ @@ -140,6 +287,9 @@ ], "gzi": [ + ], + "sizes": [ + ], "versions": [ "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" @@ -147,10 +297,71 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-09-16T07:58:23.831268154" + "timestamp": "2024-11-20T17:32:02.149455586" + }, + "test_samtools_faidx_get_sizes - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,e3e4ba35a02020d173be8d1ee04eaebf" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gzi": [ + + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e3e4ba35a02020d173be8d1ee04eaebf" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T17:32:34.29376776" }, "test_samtools_faidx_stub_fasta": { "content": [ @@ -171,6 +382,9 @@ ], "3": [ + + ], + "4": [ "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" ], "fa": [ @@ -187,6 +401,9 @@ ], "gzi": [ + ], + "sizes": [ + ], "versions": [ "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" @@ -194,10 +411,10 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-09-16T07:58:35.600243706" + "timestamp": "2024-11-20T17:32:09.125065185" }, "test_samtools_faidx_stub_fai": { "content": [ @@ -206,6 +423,9 @@ ], "1": [ + + ], + "2": [ [ { "id": "test", @@ -214,10 +434,10 @@ "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" ] ], - "2": [ + "3": [ ], - "3": [ + "4": [ "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" ], "fa": [ @@ -234,6 +454,80 @@ ], "gzi": [ + ], + "sizes": [ + + ], + "versions": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T17:32:16.274287863" + }, + "test_samtools_faidx_get_sizes_bgzip": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "4": [ + "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] ], "versions": [ "versions.yml:md5,6bbe80a2e14bd61202ca63e12d66027f" @@ -241,9 +535,9 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.1" }, - "timestamp": "2024-09-16T07:58:54.705460167" + "timestamp": "2024-11-20T17:32:28.117654855" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/tags.yml b/modules/nf-core/samtools/faidx/tests/tags.yml deleted file mode 100644 index e4a8394..0000000 --- a/modules/nf-core/samtools/faidx/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -samtools/faidx: - - modules/nf-core/samtools/faidx/** diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml deleted file mode 100644 index e0f58a7..0000000 --- a/modules/nf-core/samtools/index/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -samtools/index: - - modules/nf-core/samtools/index/** diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 41aa104..a40da87 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -38,7 +38,7 @@ workflow PREPARE_GENOME { .groupTuple() .filter { _meta, files -> !files[1] } - SAMTOOLS_FAIDX(fasta_for_fai, [[:], []]) + SAMTOOLS_FAIDX(fasta_for_fai, [[:], []], false) fai = user_fai.mix(SAMTOOLS_FAIDX.out.fai).collect() diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/tags.yml b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/tags.yml deleted file mode 100644 index bb1b93c..0000000 --- a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/bam_create_som_pon_gatk: - - subworkflows/nf-core/bam_create_som_pon_gatk/** diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 76e1fd5..6be8d6e 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -13,6 +13,7 @@ [ "multiqc", "multiqc/multiqc_data", + "multiqc/multiqc_data/BETA-multiqc.parquet", "multiqc/multiqc_data/multiqc.log", "multiqc/multiqc_data/multiqc_citations.txt", "multiqc/multiqc_data/multiqc_data.json", @@ -37,6 +38,7 @@ "samtools/sample4.bam" ], [ + "BETA-multiqc.parquet:md5,700aa7f7497a3ebc9fe444a93ee0bd65", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "panel.cnn:md5,1443acdb3bb430b0c144ec100ef8a514", "sample3.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", @@ -53,8 +55,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nextflow": "25.04.2" }, - "timestamp": "2025-04-09T14:47:24.266861802" + "timestamp": "2025-06-05T08:59:59.283280871" } } \ No newline at end of file diff --git a/tests/gens_pon.nf.test.snap b/tests/gens_pon.nf.test.snap index 2803a69..112f634 100644 --- a/tests/gens_pon.nf.test.snap +++ b/tests/gens_pon.nf.test.snap @@ -4,13 +4,13 @@ 5, { "GATK4_COLLECTREADCOUNTS": { - "gatk4": "4.5.0.0" + "gatk4": "4.6.1.0" }, "GATK4_CREATEREADCOUNTPANELOFNORMALS": { - "gatk4": "4.5.0.0" + "gatk4": "4.6.1.0" }, "GATK4_PREPROCESSINTERVALS_GENS": { - "gatk4": "4.5.0.0" + "gatk4": "4.6.1.0" }, "Workflow": { "nf-core/createpanelrefs": "v1.0dev" @@ -25,6 +25,7 @@ "gens_pon/readcounts/sample2.tsv", "multiqc", "multiqc/multiqc_data", + "multiqc/multiqc_data/BETA-multiqc.parquet", "multiqc/multiqc_data/multiqc.log", "multiqc/multiqc_data/multiqc_citations.txt", "multiqc/multiqc_data/multiqc_data.json", @@ -41,13 +42,14 @@ [ "sample1.tsv:md5,bfceab35109b04317f38f9cc1cde71ca", "sample2.tsv:md5,7141d08cdc26f6057557be9e23ef4365", + "BETA-multiqc.parquet:md5,b0e6858f997bcf27b7d7cdff6b3ae49d", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nextflow": "25.04.2" }, - "timestamp": "2025-04-14T18:36:47.335786578" + "timestamp": "2025-06-05T09:06:19.856807946" } } \ No newline at end of file From 781da8430d140cecd1210be5483592bdc9b72ec7 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 12 Jun 2025 11:06:38 +0200 Subject: [PATCH 196/234] fix rocrate --- ro-crate-metadata.json | 72 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 14 deletions(-) diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 2494148..fd57e6c 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,8 +22,8 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "InProgress", - "datePublished": "2025-06-03T11:01:15+00:00", - "description": "

\n \n \n \"nf-core/createpanelrefs\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.04.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics pipeline that ...\n\n\n\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\n\nNow, you can run the pipeline using:\n\n\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "datePublished": "2025-06-12T09:06:27+00:00", + "description": "

\n \n \n \"nf-core/createpanelrefs\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.04.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics helper pipeline that will help in creating panel of normals and other models.\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n2. Build Panel of Normals for [`CNVKIT`](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873)\n3. Build ploidy and cnv calling models for [`GATK's germlinecnvcaller workflow`](https://genome.cshlp.org/content/20/9/1297)\n4. Build Panel of Normals for [`GENS`](https://github.com/Clinical-Genomics-Lund/gens)\n5. Build Panel of Normals for [`Mutect2`](https://genome.cshlp.org/content/20/9/1297)\n6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,bam,bai,cram,crai\nsample1,sample1.bam,sample1.bai,,\nsample2,sample2.bam,,,\nsample3,sample3.bam,sample3.bai,,\nsample4,sample4.bam,,,\n```\n\nEach row in the samplesheet represents an alignment file, and it is important that you provide the files in the right format for the analysis you want to run.\n\n| Tool | Alignment format |\n| ----------------- | ---------------------------- |\n| cnvkit | bam |\n| germlinecnvcaller | bam or cram or a mix of both |\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --tools \\\n --genome GATK.GRCh38 \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n@marrip contributed in the idea that started it all.\n@matthdsm and @FriederikeHanssen contributed in the actual design.\n@ramprasadn's interest was the final push that led to the creation.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @jfy133\n- @JoseEspinosa\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -99,7 +99,7 @@ }, "mentions": [ { - "@id": "#3adc6bc9-5659-4ed2-aff7-659417321aab" + "@id": "#f19e6f5f-6b08-4b64-a367-07ff4a6591c8" } ], "name": "nf-core/createpanelrefs" @@ -121,21 +121,53 @@ }, { "@id": "main.nf", - "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow" + ], + "creator": [ + { + "@id": "https://orcid.org/0000-0001-7313-3734" + }, + { + "@id": "#max.u.garcia@gmail.com" + } + ], "dateCreated": "", - "dateModified": "2025-06-03T11:01:15Z", + "dateModified": "2025-06-12T11:06:27Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", - "keywords": ["nf-core", "nextflow"], - "license": ["MIT"], - "name": ["nf-core/createpanelrefs"], + "keywords": [ + "nf-core", + "nextflow" + ], + "license": [ + "MIT" + ], + "maintainer": [ + { + "@id": "https://orcid.org/0000-0001-7313-3734" + }, + { + "@id": "#max.u.garcia@gmail.com" + } + ], + "name": [ + "nf-core/createpanelrefs" + ], "programmingLanguage": { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" }, "sdPublisher": { "@id": "https://nf-co.re/" }, - "url": ["https://github.com/nf-core/createpanelrefs", "https://nf-co.re/createpanelrefs/dev/"], - "version": ["1.0dev"] + "url": [ + "https://github.com/nf-core/createpanelrefs", + "https://nf-co.re/createpanelrefs/dev/" + ], + "version": [ + "1.0dev" + ] }, { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", @@ -150,11 +182,11 @@ "version": "!>=24.04.2" }, { - "@id": "#3adc6bc9-5659-4ed2-aff7-659417321aab", + "@id": "#f19e6f5f-6b08-4b64-a367-07ff4a6591c8", "@type": "TestSuite", "instance": [ { - "@id": "#0f1c2baa-1425-40cc-9f1d-c2c88cf08e1b" + "@id": "#4593c739-44a0-4fc1-8a70-a445dbf90a69" } ], "mainEntity": { @@ -163,7 +195,7 @@ "name": "Test suite for nf-core/createpanelrefs" }, { - "@id": "#0f1c2baa-1425-40cc-9f1d-c2c88cf08e1b", + "@id": "#4593c739-44a0-4fc1-8a70-a445dbf90a69", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/createpanelrefs", "resource": "repos/nf-core/createpanelrefs/actions/workflows/nf-test.yml", @@ -290,6 +322,18 @@ "@type": "Organization", "name": "nf-core", "url": "https://nf-co.re/" + }, + { + "@id": "https://orcid.org/0000-0001-7313-3734", + "@type": "Person", + "email": "20065894+ramprasadn@users.noreply.github.com", + "name": "Ramprasad Neethiraj" + }, + { + "@id": "#max.u.garcia@gmail.com", + "@type": "Person", + "email": "max.u.garcia@gmail.com", + "name": "Maxime U Garcia" } ] -} +} \ No newline at end of file From 8b7158e9a0aada978df157eda9faa34afcddf977 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 12 Jun 2025 12:26:49 +0200 Subject: [PATCH 197/234] fix name of versions file, update to nft-utils 0.0.4 and regenerate snapshots --- main.nf | 2 +- nf-test.config | 2 +- tests/default.nf.test | 4 +- tests/default.nf.test.snap | 11 ++--- tests/gens_pon.nf.test | 4 +- tests/gens_pon.nf.test.snap | 11 ++--- tests/germlinecnvcaller_cohort.nf.test | 4 +- tests/germlinecnvcaller_cohort.nf.test.snap | 55 +++++++++++---------- tests/mutect2.nf.test | 8 +-- tests/mutect2.nf.test.snap | 34 ++++++------- 10 files changed, 65 insertions(+), 70 deletions(-) diff --git a/main.nf b/main.nf index b423be2..e8579b5 100644 --- a/main.nf +++ b/main.nf @@ -157,7 +157,7 @@ workflow { // Collate and save software versions collated_versions = softwareVersionsToYAML(versions).collectFile( storeDir: "${params.outdir}/pipeline_info", - name: 'nf_core_ceatepanelrefs_software_mqc_versions.yml', + name: 'nf_core_createpanelrefs_software_mqc_versions.yml', sort: true, newLine: true, ) diff --git a/nf-test.config b/nf-test.config index 4a258d9..80fcaad 100644 --- a/nf-test.config +++ b/nf-test.config @@ -20,6 +20,6 @@ config { // load the necessary plugins plugins { load "nft-bam@0.4.0" - load "nft-utils@0.0.3" + load "nft-utils@0.0.4" } } diff --git a/tests/default.nf.test b/tests/default.nf.test index 64e2159..fd3d35e 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -24,8 +24,8 @@ nextflow_pipeline { { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), - // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml"), + // pipeline versions.yml file for multiqc from which Nextflow and pipeline versions are removed (all from the workflow key) + removeFromYamlMap("${outputDir}/pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", "Workflow"), // All stable path name, with a relative path stable_name, // All files with stable contents diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 6be8d6e..b5eeb64 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -5,9 +5,6 @@ { "CNVKIT_BATCH": { "cnvkit": "0.9.10" - }, - "Workflow": { - "nf-core/createpanelrefs": "v1.0dev" } }, [ @@ -21,7 +18,7 @@ "multiqc/multiqc_data/multiqc_sources.txt", "multiqc/multiqc_report.html", "pipeline_info", - "pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml", + "pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", "reference", "reference/cnvkit", "reference/cnvkit/panel.cnn", @@ -38,7 +35,7 @@ "samtools/sample4.bam" ], [ - "BETA-multiqc.parquet:md5,700aa7f7497a3ebc9fe444a93ee0bd65", + "BETA-multiqc.parquet:md5,43859f31ac62a102622f7c50461e15eb", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "panel.cnn:md5,1443acdb3bb430b0c144ec100ef8a514", "sample3.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", @@ -55,8 +52,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.04.2" + "nextflow": "25.04.3" }, - "timestamp": "2025-06-05T08:59:59.283280871" + "timestamp": "2025-06-12T12:04:39.677550189" } } \ No newline at end of file diff --git a/tests/gens_pon.nf.test b/tests/gens_pon.nf.test index 417fa67..471d014 100644 --- a/tests/gens_pon.nf.test +++ b/tests/gens_pon.nf.test @@ -31,8 +31,8 @@ nextflow_pipeline { { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), - // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml"), + // pipeline versions.yml file for multiqc from which Nextflow and pipeline versions are removed (all from the workflow key) + removeFromYamlMap("${outputDir}/pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", "Workflow"), // All stable path name, with a relative path stable_name, // All files with stable contents diff --git a/tests/gens_pon.nf.test.snap b/tests/gens_pon.nf.test.snap index 112f634..634bdc8 100644 --- a/tests/gens_pon.nf.test.snap +++ b/tests/gens_pon.nf.test.snap @@ -11,9 +11,6 @@ }, "GATK4_PREPROCESSINTERVALS_GENS": { "gatk4": "4.6.1.0" - }, - "Workflow": { - "nf-core/createpanelrefs": "v1.0dev" } }, [ @@ -33,7 +30,7 @@ "multiqc/multiqc_data/multiqc_sources.txt", "multiqc/multiqc_report.html", "pipeline_info", - "pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml", + "pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", "references", "references/intervals", "references/intervals/gens_pon", @@ -42,14 +39,14 @@ [ "sample1.tsv:md5,bfceab35109b04317f38f9cc1cde71ca", "sample2.tsv:md5,7141d08cdc26f6057557be9e23ef4365", - "BETA-multiqc.parquet:md5,b0e6858f997bcf27b7d7cdff6b3ae49d", + "BETA-multiqc.parquet:md5,2951d99beb76fdf5c8f3c058a2508669", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.04.2" + "nextflow": "25.04.3" }, - "timestamp": "2025-06-05T09:06:19.856807946" + "timestamp": "2025-06-12T12:08:33.46752653" } } \ No newline at end of file diff --git a/tests/germlinecnvcaller_cohort.nf.test b/tests/germlinecnvcaller_cohort.nf.test index 99ad00c..55b9006 100644 --- a/tests/germlinecnvcaller_cohort.nf.test +++ b/tests/germlinecnvcaller_cohort.nf.test @@ -29,8 +29,8 @@ nextflow_pipeline { { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), - // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml"), + // pipeline versions.yml file for multiqc from which Nextflow and pipeline versions are removed (all from the workflow key) + removeFromYamlMap("${outputDir}/pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", "Workflow"), // All stable path name, with a relative path stable_name, // All files with stable contents diff --git a/tests/germlinecnvcaller_cohort.nf.test.snap b/tests/germlinecnvcaller_cohort.nf.test.snap index c24d518..0deade5 100644 --- a/tests/germlinecnvcaller_cohort.nf.test.snap +++ b/tests/germlinecnvcaller_cohort.nf.test.snap @@ -4,28 +4,25 @@ 10, { "GATK4_ANNOTATEINTERVALS": { - "gatk4": "4.5.0.0" + "gatk4": "4.6.1.0" }, "GATK4_COLLECTREADCOUNTS": { - "gatk4": "4.5.0.0" + "gatk4": "4.6.1.0" }, "GATK4_DETERMINEGERMLINECONTIGPLOIDY": { - "gatk4": "4.5.0.0" + "gatk4": "4.6.1.0" }, "GATK4_FILTERINTERVALS": { - "gatk4": "4.5.0.0" + "gatk4": "4.6.1.0" }, "GATK4_GERMLINECNVCALLER": { - "gatk4": "4.5.0.0" + "gatk4": "4.6.1.0" }, "GATK4_INTERVALLISTTOOLS": { - "gatk4": "4.5.0.0" + "gatk4": "4.6.1.0" }, "GATK4_PREPROCESSINTERVALS": { - "gatk4": "4.5.0.0" - }, - "Workflow": { - "nf-core/createpanelrefs": "v1.0dev" + "gatk4": "4.6.1.0" } }, [ @@ -35,10 +32,10 @@ "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/contig_ploidy_prior.tsv", "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/gcnvkernel_version.json", "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/interval_list.tsv", - "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/mu_mean_bias_j_lowerbound__.tsv", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/mu_mean_bias_j_interval__.tsv", "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/mu_psi_j_log__.tsv", "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/ploidy_config.json", - "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/std_mean_bias_j_lowerbound__.tsv", + "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/std_mean_bias_j_interval__.tsv", "germlinecnvcaller/determinegermlinecontigploidy/cohort-model/std_psi_j_log__.tsv", "germlinecnvcaller/germlinecnvcaller", "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model", @@ -80,11 +77,11 @@ "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/interval_list.tsv", "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/log_q_tau_tk.tsv", "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/mu_W_tu.tsv", - "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/mu_ard_u_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/mu_ard_u_interval__.tsv", "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/mu_log_mean_bias_t.tsv", "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/mu_psi_t_log__.tsv", "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/std_W_tu.tsv", - "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/std_ard_u_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/std_ard_u_interval__.tsv", "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/std_log_mean_bias_t.tsv", "germlinecnvcaller/germlinecnvcaller/1scattered-cnv-model/1scattered-model/std_psi_t_log__.tsv", "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model", @@ -126,11 +123,11 @@ "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/interval_list.tsv", "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/log_q_tau_tk.tsv", "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/mu_W_tu.tsv", - "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/mu_ard_u_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/mu_ard_u_interval__.tsv", "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/mu_log_mean_bias_t.tsv", "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/mu_psi_t_log__.tsv", "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_W_tu.tsv", - "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_ard_u_log__.tsv", + "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_ard_u_interval__.tsv", "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_log_mean_bias_t.tsv", "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_psi_t_log__.tsv", "germlinecnvcaller/readcounts", @@ -138,6 +135,7 @@ "germlinecnvcaller/readcounts/sample2.hdf5", "multiqc", "multiqc/multiqc_data", + "multiqc/multiqc_data/BETA-multiqc.parquet", "multiqc/multiqc_data/multiqc.log", "multiqc/multiqc_data/multiqc_citations.txt", "multiqc/multiqc_data/multiqc_data.json", @@ -145,38 +143,43 @@ "multiqc/multiqc_data/multiqc_sources.txt", "multiqc/multiqc_report.html", "pipeline_info", - "pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml" + "pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml" ], [ "contig_ploidy_prior.tsv:md5,7a2f5444b09a1f635a540bbcd23176cf", - "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", + "gcnvkernel_version.json:md5,aa0ad91e0ff23be94e09e909cf2286d8", "interval_list.tsv:md5,8c5aaf57cf34ff35b183178a87a9f864", - "mu_mean_bias_j_lowerbound__.tsv:md5,76bfcb151ebf3c97cf9e3499891eb652", + "mu_mean_bias_j_interval__.tsv:md5,27ae705b7a002517873357ba7ecde39f", "ploidy_config.json:md5,4cdc16109826fa7d3cdfd1dc8758ec27", - "std_mean_bias_j_lowerbound__.tsv:md5,9b31e5eecfd8ec379bc7ff126d599ab9", + "std_mean_bias_j_interval__.tsv:md5,0816459105b443e6f266e725f298f0b9", "calling_config.json:md5,dc78889e3d0ba1adf02d10db0194128e", "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", - "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", + "gcnvkernel_version.json:md5,aa0ad91e0ff23be94e09e909cf2286d8", "interval_list.tsv:md5,0cba6efc5fa465f7930c901916a3e764", "calling_config.json:md5,dc78889e3d0ba1adf02d10db0194128e", "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", - "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", + "gcnvkernel_version.json:md5,aa0ad91e0ff23be94e09e909cf2286d8", "interval_list.tsv:md5,0cba6efc5fa465f7930c901916a3e764", + "mu_ard_u_interval__.tsv:md5,1666252e672c760c1148dc20c62635ad", + "std_ard_u_interval__.tsv:md5,f6a3ec95fa4a2cc8e3d155f7e12f43a1", "calling_config.json:md5,dc78889e3d0ba1adf02d10db0194128e", "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", - "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", + "gcnvkernel_version.json:md5,aa0ad91e0ff23be94e09e909cf2286d8", "interval_list.tsv:md5,9f7718372e87e69aeb837678755a9971", "calling_config.json:md5,dc78889e3d0ba1adf02d10db0194128e", "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", - "gcnvkernel_version.json:md5,273ffad942b47cacbc56d29f77ccf2a6", + "gcnvkernel_version.json:md5,aa0ad91e0ff23be94e09e909cf2286d8", "interval_list.tsv:md5,9f7718372e87e69aeb837678755a9971", + "mu_ard_u_interval__.tsv:md5,f29ca1587a34a758a291eb9905057d54", + "std_ard_u_interval__.tsv:md5,6989c174d280ca75ee537a0947e469e9", + "BETA-multiqc.parquet:md5,ce459aa978b3b8518a3abc2339bd7f69", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nextflow": "25.04.3" }, - "timestamp": "2025-04-11T19:42:40.668405904" + "timestamp": "2025-06-12T12:17:30.06451273" } } \ No newline at end of file diff --git a/tests/mutect2.nf.test b/tests/mutect2.nf.test index 5ca7b77..744e112 100644 --- a/tests/mutect2.nf.test +++ b/tests/mutect2.nf.test @@ -24,8 +24,8 @@ nextflow_pipeline { { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), - // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml"), + // pipeline versions.yml file for multiqc from which Nextflow and pipeline versions are removed (all from the workflow key) + removeFromYamlMap("${outputDir}/pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", "Workflow"), // All stable path name, with a relative path stable_name, // All files with stable contents @@ -57,8 +57,8 @@ nextflow_pipeline { { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), - // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml"), + // pipeline versions.yml file for multiqc from which Nextflow and pipeline versions are removed (all from the workflow key) + removeFromYamlMap("${outputDir}/pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", "Workflow"), // All stable path name, with a relative path stable_name, // All files with stable contents diff --git a/tests/mutect2.nf.test.snap b/tests/mutect2.nf.test.snap index 89add30..10322a6 100644 --- a/tests/mutect2.nf.test.snap +++ b/tests/mutect2.nf.test.snap @@ -7,16 +7,13 @@ "gawk": "5.3.0" }, "GATK4_CREATESOMATICPANELOFNORMALS": { - "gatk4": "4.5.0.0" + "gatk4": "4.6.1.0" }, "GATK4_GENOMICSDBIMPORT": { - "gatk4": "4.5.0.0" + "gatk4": "4.6.1.0" }, "GATK4_MUTECT2": { - "gatk4": "4.5.0.0" - }, - "Workflow": { - "nf-core/createpanelrefs": "v1.0dev" + "gatk4": "4.6.1.0" } }, [ @@ -40,6 +37,7 @@ "intervals/mutect2_target_bed/genome.bed", "multiqc", "multiqc/multiqc_data", + "multiqc/multiqc_data/BETA-multiqc.parquet", "multiqc/multiqc_data/multiqc.log", "multiqc/multiqc_data/multiqc_citations.txt", "multiqc/multiqc_data/multiqc_data.json", @@ -47,36 +45,34 @@ "multiqc/multiqc_data/multiqc_sources.txt", "multiqc/multiqc_report.html", "pipeline_info", - "pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml" + "pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml" ], [ "sample3.vcf.gz.stats:md5,a05ace4138fc5cb993ed912d654ec22d", "sample4.vcf.gz.stats:md5,080e6d0e254e582dfb9d5916c9637391", "genome.bed:md5,472d213cfcde96565699779d5bfc0e32", + "BETA-multiqc.parquet:md5,7483f85c57b7d9e17ec1b2ed42f684b9", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nextflow": "25.04.3" }, - "timestamp": "2025-04-16T11:05:19.08294075" + "timestamp": "2025-06-12T12:26:09.554931606" }, "-profile test --tools mutect2 --mutect2_pon_name test": { "content": [ 5, { "GATK4_CREATESOMATICPANELOFNORMALS": { - "gatk4": "4.5.0.0" + "gatk4": "4.6.1.0" }, "GATK4_GENOMICSDBIMPORT": { - "gatk4": "4.5.0.0" + "gatk4": "4.6.1.0" }, "GATK4_MUTECT2": { - "gatk4": "4.5.0.0" - }, - "Workflow": { - "nf-core/createpanelrefs": "v1.0dev" + "gatk4": "4.6.1.0" } }, [ @@ -99,6 +95,7 @@ "gatk4/test/vidmap.json", "multiqc", "multiqc/multiqc_data", + "multiqc/multiqc_data/BETA-multiqc.parquet", "multiqc/multiqc_data/multiqc.log", "multiqc/multiqc_data/multiqc_citations.txt", "multiqc/multiqc_data/multiqc_data.json", @@ -106,18 +103,19 @@ "multiqc/multiqc_data/multiqc_sources.txt", "multiqc/multiqc_report.html", "pipeline_info", - "pipeline_info/nf_core_ceatepanelrefs_software_mqc_versions.yml" + "pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml" ], [ "sample1.vcf.gz.stats:md5,b569ce66bbffe9588b3d221e821023ee", "sample2.vcf.gz.stats:md5,76f749c53212d72e98801f6030fbf8a6", + "BETA-multiqc.parquet:md5,d5f02cd6ec31ddcd621d85446f5885db", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nextflow": "25.04.3" }, - "timestamp": "2025-04-09T15:07:42.422460888" + "timestamp": "2025-06-12T12:22:15.173878756" } } \ No newline at end of file From fa7bcde6b63deef0817305db9085342973e33799 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 12 Jun 2025 12:28:43 +0200 Subject: [PATCH 198/234] update CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ab03a5..99f56cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,11 +32,14 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#50](https://github.com/nf-core/createpanelrefs/pull/50) - Improve references related files handling - [#50](https://github.com/nf-core/createpanelrefs/pull/50) - Heavy refactoring of the pipeline - [#52](https://github.com/nf-core/createpanelrefs/pull/52) - Template update for nf-core/tools v3.2.1 +- [#54](https://github.com/nf-core/createpanelrefs/pull/54) - Template update for nf-core/tools v3.3.1 +- [#54](https://github.com/nf-core/createpanelrefs/pull/54) - Update nft-utils to 0.0.4 ### `Fixed` - [#50](https://github.com/nf-core/createpanelrefs/pull/50) - Fix mutect2 that wasn't working without a bed file - [#53](https://github.com/nf-core/createpanelrefs/pull/53) - Minor syntax fixes due to [#50](https://github.com/nf-core/createpanelrefs/pull/50) +- [#54](https://github.com/nf-core/createpanelrefs/pull/54) - Fix name for `_mqc_versions.yml` file ### `Dependencies` From c12d9dad16d5c8109b77450abd5a9a7827761afe Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 12 Jun 2025 12:31:12 +0200 Subject: [PATCH 199/234] remove multiqc beta parquet files from snapshots --- tests/.nftignore | 1 + tests/default.nf.test.snap | 1 - tests/gens_pon.nf.test.snap | 1 - tests/germlinecnvcaller_cohort.nf.test.snap | 1 - tests/mutect2.nf.test.snap | 2 -- 5 files changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/.nftignore b/tests/.nftignore index 954215f..ca92657 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -28,6 +28,7 @@ germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-mode germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/std_log_mean_bias_t.tsv germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/std_psi_t_log__.tsv germlinecnvcaller/readcounts/*.hdf5 +multiqc/multiqc_data/BETA-multiqc.parquet multiqc/multiqc_data/multiqc.log multiqc/multiqc_data/multiqc_data.json multiqc/multiqc_data/multiqc_general_stats.txt diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index b5eeb64..c34e8f4 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -35,7 +35,6 @@ "samtools/sample4.bam" ], [ - "BETA-multiqc.parquet:md5,43859f31ac62a102622f7c50461e15eb", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", "panel.cnn:md5,1443acdb3bb430b0c144ec100ef8a514", "sample3.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", diff --git a/tests/gens_pon.nf.test.snap b/tests/gens_pon.nf.test.snap index 634bdc8..e7555bd 100644 --- a/tests/gens_pon.nf.test.snap +++ b/tests/gens_pon.nf.test.snap @@ -39,7 +39,6 @@ [ "sample1.tsv:md5,bfceab35109b04317f38f9cc1cde71ca", "sample2.tsv:md5,7141d08cdc26f6057557be9e23ef4365", - "BETA-multiqc.parquet:md5,2951d99beb76fdf5c8f3c058a2508669", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], diff --git a/tests/germlinecnvcaller_cohort.nf.test.snap b/tests/germlinecnvcaller_cohort.nf.test.snap index 0deade5..561f42a 100644 --- a/tests/germlinecnvcaller_cohort.nf.test.snap +++ b/tests/germlinecnvcaller_cohort.nf.test.snap @@ -172,7 +172,6 @@ "interval_list.tsv:md5,9f7718372e87e69aeb837678755a9971", "mu_ard_u_interval__.tsv:md5,f29ca1587a34a758a291eb9905057d54", "std_ard_u_interval__.tsv:md5,6989c174d280ca75ee537a0947e469e9", - "BETA-multiqc.parquet:md5,ce459aa978b3b8518a3abc2339bd7f69", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], diff --git a/tests/mutect2.nf.test.snap b/tests/mutect2.nf.test.snap index 10322a6..6d5014a 100644 --- a/tests/mutect2.nf.test.snap +++ b/tests/mutect2.nf.test.snap @@ -51,7 +51,6 @@ "sample3.vcf.gz.stats:md5,a05ace4138fc5cb993ed912d654ec22d", "sample4.vcf.gz.stats:md5,080e6d0e254e582dfb9d5916c9637391", "genome.bed:md5,472d213cfcde96565699779d5bfc0e32", - "BETA-multiqc.parquet:md5,7483f85c57b7d9e17ec1b2ed42f684b9", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], @@ -108,7 +107,6 @@ [ "sample1.vcf.gz.stats:md5,b569ce66bbffe9588b3d221e821023ee", "sample2.vcf.gz.stats:md5,76f749c53212d72e98801f6030fbf8a6", - "BETA-multiqc.parquet:md5,d5f02cd6ec31ddcd621d85446f5885db", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], From 6a8e36caca64e25d221f2adefb893d35a4bec044 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 12 Jun 2025 12:55:10 +0200 Subject: [PATCH 200/234] add to nftignore and regenerate snapshots --- tests/.nftignore | 2 ++ tests/germlinecnvcaller_cohort.nf.test.snap | 6 +----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/.nftignore b/tests/.nftignore index ca92657..2cfd58d 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -20,10 +20,12 @@ germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-call germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/log_q_tau_tk.tsv germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/mu_W_tu.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/mu_ard_u_interval__.tsv germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/mu_ard_u_log__.tsv germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/mu_log_mean_bias_t.tsv germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/mu_psi_t_log__.tsv germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/std_W_tu.tsv +germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/std_ard_u_interval__.tsv germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/std_ard_u_log__.tsv germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/std_log_mean_bias_t.tsv germlinecnvcaller/germlinecnvcaller/{1,2}scattered-cnv-model/{1,2}scattered-model/std_psi_t_log__.tsv diff --git a/tests/germlinecnvcaller_cohort.nf.test.snap b/tests/germlinecnvcaller_cohort.nf.test.snap index 561f42a..ae43fd4 100644 --- a/tests/germlinecnvcaller_cohort.nf.test.snap +++ b/tests/germlinecnvcaller_cohort.nf.test.snap @@ -160,8 +160,6 @@ "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", "gcnvkernel_version.json:md5,aa0ad91e0ff23be94e09e909cf2286d8", "interval_list.tsv:md5,0cba6efc5fa465f7930c901916a3e764", - "mu_ard_u_interval__.tsv:md5,1666252e672c760c1148dc20c62635ad", - "std_ard_u_interval__.tsv:md5,f6a3ec95fa4a2cc8e3d155f7e12f43a1", "calling_config.json:md5,dc78889e3d0ba1adf02d10db0194128e", "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", "gcnvkernel_version.json:md5,aa0ad91e0ff23be94e09e909cf2286d8", @@ -170,8 +168,6 @@ "denoising_config.json:md5,d2171570b7c746be4d2ce35d9b8db18c", "gcnvkernel_version.json:md5,aa0ad91e0ff23be94e09e909cf2286d8", "interval_list.tsv:md5,9f7718372e87e69aeb837678755a9971", - "mu_ard_u_interval__.tsv:md5,f29ca1587a34a758a291eb9905057d54", - "std_ard_u_interval__.tsv:md5,6989c174d280ca75ee537a0947e469e9", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], @@ -179,6 +175,6 @@ "nf-test": "0.9.2", "nextflow": "25.04.3" }, - "timestamp": "2025-06-12T12:17:30.06451273" + "timestamp": "2025-06-12T12:53:41.379412323" } } \ No newline at end of file From 875215ce2d10670898daa3132d69b7f4990f2317 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 12 Jun 2025 15:45:40 +0200 Subject: [PATCH 201/234] update CHANGELOG --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 99f56cc..fced38a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,4 +43,12 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n ### `Dependencies` +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `cnvkit` | | 0.9.10 | +| `gatk4` | | 4.6.1.0 | +| `gawk` | | 5.3.0 | +| `multiqc` | | 1.29 | +| `samtools` | | 1.21 | + ### `Deprecated` From 98ee3251fb89897eb248859321f7d492fd708263 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Mon, 16 Jun 2025 13:13:52 +0200 Subject: [PATCH 202/234] prepare RC 1.0.0 --- .nf-core.yml | 2 +- CHANGELOG.md | 4 ++- assets/multiqc_config.yml | 7 ++-- nextflow.config | 2 +- ro-crate-metadata.json | 76 ++++++++++++++++++++++++++++++--------- 5 files changed, 69 insertions(+), 22 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index cecee1d..62f39aa 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -18,4 +18,4 @@ template: name: createpanelrefs org: nf-core outdir: . - version: 1.0dev + version: 1.0.0 diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ab03a5..a124a9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0dev - [date] +## [1.1.0](https://github.com/nf-core/createpanelrefs/releases/tag/1.1.0) Initial release of nf-core/createpanelrefs, created with the [nf-core](https://nf-co.re/) template. @@ -32,6 +32,8 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#50](https://github.com/nf-core/createpanelrefs/pull/50) - Improve references related files handling - [#50](https://github.com/nf-core/createpanelrefs/pull/50) - Heavy refactoring of the pipeline - [#52](https://github.com/nf-core/createpanelrefs/pull/52) - Template update for nf-core/tools v3.2.1 +- [#54](https://github.com/nf-core/createpanelrefs/pull/54) - Template update for nf-core/tools v3.3.1 +- [#55](https://github.com/nf-core/createpanelrefs/pull/55) - Prepare relase 1.0.0 ### `Fixed` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 767cbd1..cefa702 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,8 @@ report_comment: > - This report has been generated by the nf-core/createpanelrefs - analysis pipeline. For information about how to interpret these results, please see the - documentation. + This report has been generated by the nf-core/createpanelrefs analysis pipeline. For information about + how to interpret these results, please see the documentation. report_section_order: "nf-core-createpanelrefs-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index ca58e4c..23fc756 100644 --- a/nextflow.config +++ b/nextflow.config @@ -273,7 +273,7 @@ manifest { mainScript = 'main.nf' defaultBranch = 'master' nextflowVersion = '!>=24.04.2' - version = '1.0dev' + version = '1.0.0' doi = '' } diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 7fe2bb6..44bc9a8 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -21,9 +21,9 @@ { "@id": "./", "@type": "Dataset", - "creativeWorkStatus": "InProgress", - "datePublished": "2025-04-30T12:25:44+00:00", - "description": "

\n \n \n \"nf-core/createpanelrefs\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics pipeline that ...\n\n\n\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\n\nNow, you can run the pipeline using:\n\n\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "creativeWorkStatus": "Stable", + "datePublished": "2025-06-16T11:09:19+00:00", + "description": "

\n \n \n \"nf-core/createpanelrefs\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)\n[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)\n[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)\n[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics helper pipeline that will help in creating panel of normals and other models.\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n2. Build Panel of Normals for [`CNVKIT`](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873)\n3. Build ploidy and cnv calling models for [`GATK's germlinecnvcaller workflow`](https://genome.cshlp.org/content/20/9/1297)\n4. Build Panel of Normals for [`GENS`](https://github.com/Clinical-Genomics-Lund/gens)\n5. Build Panel of Normals for [`Mutect2`](https://genome.cshlp.org/content/20/9/1297)\n6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,bam,bai,cram,crai\nsample1,sample1.bam,sample1.bai,,\nsample2,sample2.bam,,,\nsample3,sample3.bam,sample3.bai,,\nsample4,sample4.bam,,,\n```\n\nEach row in the samplesheet represents an alignment file, and it is important that you provide the files in the right format for the analysis you want to run.\n\n| Tool | Alignment format |\n| ----------------- | ---------------------------- |\n| cnvkit | bam |\n| germlinecnvcaller | bam or cram or a mix of both |\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --tools \\\n --genome GATK.GRCh38 \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n@marrip contributed in the idea that started it all.\n@matthdsm and @FriederikeHanssen contributed in the actual design.\n@ramprasadn's interest was the final push that led to the creation.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @jfy133\n- @JoseEspinosa\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -99,7 +99,7 @@ }, "mentions": [ { - "@id": "#2b320870-9e4c-42e1-a7eb-10c240208c11" + "@id": "#a37c2915-bcec-445b-b12d-068548bbba30" } ], "name": "nf-core/createpanelrefs" @@ -121,21 +121,53 @@ }, { "@id": "main.nf", - "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow" + ], + "creator": [ + { + "@id": "#max.u.garcia@gmail.com" + }, + { + "@id": "https://orcid.org/0000-0001-7313-3734" + } + ], "dateCreated": "", - "dateModified": "2025-04-30T12:25:44Z", + "dateModified": "2025-06-16T13:09:19Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", - "keywords": ["nf-core", "nextflow"], - "license": ["MIT"], - "name": ["nf-core/createpanelrefs"], + "keywords": [ + "nf-core", + "nextflow" + ], + "license": [ + "MIT" + ], + "maintainer": [ + { + "@id": "#max.u.garcia@gmail.com" + }, + { + "@id": "https://orcid.org/0000-0001-7313-3734" + } + ], + "name": [ + "nf-core/createpanelrefs" + ], "programmingLanguage": { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" }, "sdPublisher": { "@id": "https://nf-co.re/" }, - "url": ["https://github.com/nf-core/createpanelrefs", "https://nf-co.re/createpanelrefs/dev/"], - "version": ["1.0dev"] + "url": [ + "https://github.com/nf-core/createpanelrefs", + "https://nf-co.re/createpanelrefs/1.0.0/" + ], + "version": [ + "1.0.0" + ] }, { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", @@ -150,11 +182,11 @@ "version": "!>=24.04.2" }, { - "@id": "#2b320870-9e4c-42e1-a7eb-10c240208c11", + "@id": "#a37c2915-bcec-445b-b12d-068548bbba30", "@type": "TestSuite", "instance": [ { - "@id": "#bff9c55b-d230-453e-89ab-f9b13e8281e0" + "@id": "#f0060111-3c61-45fc-b7c5-adf1cd2e49e1" } ], "mainEntity": { @@ -163,10 +195,10 @@ "name": "Test suite for nf-core/createpanelrefs" }, { - "@id": "#bff9c55b-d230-453e-89ab-f9b13e8281e0", + "@id": "#f0060111-3c61-45fc-b7c5-adf1cd2e49e1", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/createpanelrefs", - "resource": "repos/nf-core/createpanelrefs/actions/workflows/ci.yml", + "resource": "repos/nf-core/createpanelrefs/actions/workflows/nf-test.yml", "runsOn": { "@id": "https://w3id.org/ro/terms/test#GithubService" }, @@ -290,6 +322,18 @@ "@type": "Organization", "name": "nf-core", "url": "https://nf-co.re/" + }, + { + "@id": "#max.u.garcia@gmail.com", + "@type": "Person", + "email": "max.u.garcia@gmail.com", + "name": "Maxime U Garcia" + }, + { + "@id": "https://orcid.org/0000-0001-7313-3734", + "@type": "Person", + "email": "20065894+ramprasadn@users.noreply.github.com", + "name": "Ramprasad Neethiraj" } ] -} +} \ No newline at end of file From 0a8854ab060a959f4e854151f878d65750f7d183 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Mon, 16 Jun 2025 13:19:18 +0200 Subject: [PATCH 203/234] default branch is main --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 4f2237f..7ccac60 100644 --- a/nextflow.config +++ b/nextflow.config @@ -279,7 +279,7 @@ manifest { homePage = 'https://github.com/nf-core/createpanelrefs' description = """Generate Panel of Normals, models or other similar references from lots of samples""" mainScript = 'main.nf' - defaultBranch = 'master' + defaultBranch = 'main' nextflowVersion = '!>=24.04.2' version = '1.0.0' doi = '' From 9eee2512cbc2c402994e9e513d1bc7bc66688459 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Mon, 16 Jun 2025 13:21:54 +0200 Subject: [PATCH 204/234] rocrate --- ro-crate-metadata.json | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 44bc9a8..617c048 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,8 +22,8 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "Stable", - "datePublished": "2025-06-16T11:09:19+00:00", - "description": "

\n \n \n \"nf-core/createpanelrefs\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)\n[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)\n[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)\n[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics helper pipeline that will help in creating panel of normals and other models.\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n2. Build Panel of Normals for [`CNVKIT`](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873)\n3. Build ploidy and cnv calling models for [`GATK's germlinecnvcaller workflow`](https://genome.cshlp.org/content/20/9/1297)\n4. Build Panel of Normals for [`GENS`](https://github.com/Clinical-Genomics-Lund/gens)\n5. Build Panel of Normals for [`Mutect2`](https://genome.cshlp.org/content/20/9/1297)\n6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,bam,bai,cram,crai\nsample1,sample1.bam,sample1.bai,,\nsample2,sample2.bam,,,\nsample3,sample3.bam,sample3.bai,,\nsample4,sample4.bam,,,\n```\n\nEach row in the samplesheet represents an alignment file, and it is important that you provide the files in the right format for the analysis you want to run.\n\n| Tool | Alignment format |\n| ----------------- | ---------------------------- |\n| cnvkit | bam |\n| germlinecnvcaller | bam or cram or a mix of both |\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --tools \\\n --genome GATK.GRCh38 \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n@marrip contributed in the idea that started it all.\n@matthdsm and @FriederikeHanssen contributed in the actual design.\n@ramprasadn's interest was the final push that led to the creation.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @jfy133\n- @JoseEspinosa\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "datePublished": "2025-06-16T11:21:42+00:00", + "description": "

\n \n \n \"nf-core/createpanelrefs\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.04.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics helper pipeline that will help in creating panel of normals and other models.\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n2. Build Panel of Normals for [`CNVKIT`](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873)\n3. Build ploidy and cnv calling models for [`GATK's germlinecnvcaller workflow`](https://genome.cshlp.org/content/20/9/1297)\n4. Build Panel of Normals for [`GENS`](https://github.com/Clinical-Genomics-Lund/gens)\n5. Build Panel of Normals for [`Mutect2`](https://genome.cshlp.org/content/20/9/1297)\n6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,bam,bai,cram,crai\nsample1,sample1.bam,sample1.bai,,\nsample2,sample2.bam,,,\nsample3,sample3.bam,sample3.bai,,\nsample4,sample4.bam,,,\n```\n\nEach row in the samplesheet represents an alignment file, and it is important that you provide the files in the right format for the analysis you want to run.\n\n| Tool | Alignment format |\n| ----------------- | ---------------------------- |\n| cnvkit | bam |\n| germlinecnvcaller | bam or cram or a mix of both |\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --tools \\\n --genome GATK.GRCh38 \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n@marrip contributed in the idea that started it all.\n@matthdsm and @FriederikeHanssen contributed in the actual design.\n@ramprasadn's interest was the final push that led to the creation.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @jfy133\n- @JoseEspinosa\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -99,7 +99,7 @@ }, "mentions": [ { - "@id": "#a37c2915-bcec-445b-b12d-068548bbba30" + "@id": "#4c935683-1a92-4672-8945-28a7c2c86bfe" } ], "name": "nf-core/createpanelrefs" @@ -128,14 +128,14 @@ ], "creator": [ { - "@id": "#max.u.garcia@gmail.com" + "@id": "https://orcid.org/0000-0001-7313-3734" }, { - "@id": "https://orcid.org/0000-0001-7313-3734" + "@id": "#max.u.garcia@gmail.com" } ], "dateCreated": "", - "dateModified": "2025-06-16T13:09:19Z", + "dateModified": "2025-06-16T13:21:42Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -146,10 +146,10 @@ ], "maintainer": [ { - "@id": "#max.u.garcia@gmail.com" + "@id": "https://orcid.org/0000-0001-7313-3734" }, { - "@id": "https://orcid.org/0000-0001-7313-3734" + "@id": "#max.u.garcia@gmail.com" } ], "name": [ @@ -182,11 +182,11 @@ "version": "!>=24.04.2" }, { - "@id": "#a37c2915-bcec-445b-b12d-068548bbba30", + "@id": "#4c935683-1a92-4672-8945-28a7c2c86bfe", "@type": "TestSuite", "instance": [ { - "@id": "#f0060111-3c61-45fc-b7c5-adf1cd2e49e1" + "@id": "#9574c97c-8fec-46a8-8638-cbf7ac6ccf8b" } ], "mainEntity": { @@ -195,7 +195,7 @@ "name": "Test suite for nf-core/createpanelrefs" }, { - "@id": "#f0060111-3c61-45fc-b7c5-adf1cd2e49e1", + "@id": "#9574c97c-8fec-46a8-8638-cbf7ac6ccf8b", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/createpanelrefs", "resource": "repos/nf-core/createpanelrefs/actions/workflows/nf-test.yml", @@ -323,17 +323,17 @@ "name": "nf-core", "url": "https://nf-co.re/" }, - { - "@id": "#max.u.garcia@gmail.com", - "@type": "Person", - "email": "max.u.garcia@gmail.com", - "name": "Maxime U Garcia" - }, { "@id": "https://orcid.org/0000-0001-7313-3734", "@type": "Person", "email": "20065894+ramprasadn@users.noreply.github.com", "name": "Ramprasad Neethiraj" + }, + { + "@id": "#max.u.garcia@gmail.com", + "@type": "Person", + "email": "max.u.garcia@gmail.com", + "name": "Maxime U Garcia" } ] } \ No newline at end of file From 57b34a7526ba2764a3676bc5002af081b32dbfbc Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 17 Jun 2025 15:52:46 +0200 Subject: [PATCH 205/234] update CHANGELOG --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a3aceb..5882042 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [1.1.0](https://github.com/nf-core/createpanelrefs/releases/tag/1.1.0) +## [1.1.0](https://github.com/nf-core/createpanelrefs/releases/tag/1.1.0) - Hell's Gate + +Hell's Gate National Park is a national park situated near Lake Naivasha in Kenya. Initial release of nf-core/createpanelrefs, created with the [nf-core](https://nf-co.re/) template. From 623e5cd86c35e0263efe087b15a7a229b3d84e4e Mon Sep 17 00:00:00 2001 From: Daniel Mendoza Date: Wed, 18 Jun 2025 18:40:43 +0000 Subject: [PATCH 206/234] fix gcnv interval list --- nextflow_schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 11c517e..8cba190 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -244,7 +244,7 @@ "exists": true, "format": "path", "fa_icon": "fas fa-file", - "pattern": "^\\S+\\._interval_list$", + "pattern": "^\\S+\\.interval_list$", "description": "Path to directory for exclude_interval_list file.", "help_text": "If the regions you would like to exclude are in interval_list format, use this option. If you have a bed file, use `exclude` parameter instead." }, @@ -287,7 +287,7 @@ "exists": true, "format": "path", "fa_icon": "fas fa-file", - "pattern": "^\\S+\\._interval_list$", + "pattern": "^\\S+\\.interval_list$", "description": "Path to directory for target interval_list file.", "help_text": "If the regions you would like to analyse are in interval_list format, use this option. If you have a bed file, use `target_bed` parameter instead." }, From 58b11497c90f91205ff869a391320103f6db8d1d Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 1 Jul 2025 14:39:45 +0200 Subject: [PATCH 207/234] Fix version in CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5882042..2460960 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [1.1.0](https://github.com/nf-core/createpanelrefs/releases/tag/1.1.0) - Hell's Gate +## [1.0.0](https://github.com/nf-core/createpanelrefs/releases/tag/1.0.0) - Hell's Gate Hell's Gate National Park is a national park situated near Lake Naivasha in Kenya. From 125d4a46f7eb17aa94ced014cf59cdc464d79d6d Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 1 Jul 2025 14:51:52 +0200 Subject: [PATCH 208/234] assess @nvnieuwk 's release PR comment --- .github/workflows/nf-test.yml | 11 ------- assets/schema_input.json | 55 ++++++++--------------------------- 2 files changed, 12 insertions(+), 54 deletions(-) diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index 8a0bb29..f6318ca 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -1,12 +1,5 @@ name: Run nf-test on: - push: - paths-ignore: - - "docs/**" - - "**/meta.yml" - - "**/*.md" - - "**/*.png" - - "**/*.svg" pull_request: paths-ignore: - "docs/**" @@ -25,8 +18,6 @@ concurrency: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - NFT_DIFF: "pdiff" - NFT_DIFF_ARGS: "--line-numbers --width 120 --expand-tabs=2" NFT_VER: "0.9.2" NFT_WORKDIR: "~" NXF_ANSI_LOG: false @@ -101,8 +92,6 @@ jobs: - name: Run nf-test uses: ./.github/actions/nf-test env: - NFT_DIFF: ${{ env.NFT_DIFF }} - NFT_DIFF_ARGS: ${{ env.NFT_DIFF_ARGS }} NFT_WORKDIR: ${{ env.NFT_WORKDIR }} with: profile: ${{ matrix.profile }} diff --git a/assets/schema_input.json b/assets/schema_input.json index bff29a1..ec68020 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -11,69 +11,38 @@ "type": "string", "pattern": "^\\S+$", "meta": ["id"], - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample name must be a string has to be provided and cannot contain spaces" }, "bam": { - "errorMessage": "BAM file cannot contain spaces and must have extension '.bam'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.bam$" - }, - { - "type": "string", - "maxLength": 0 - } - ], + "errorMessage": "BAM file cannot contain spaces, has to exist and must have extension '.bam'", + "type": "string", + "pattern": "^\\S+\\.bam$", "format": "file-path", "exists": true }, "bai": { "errorMessage": "BAM index file cannot contain spaces and must have extension '.bai'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.bai$" - }, - { - "type": "string", - "maxLength": 0 - } - ], + "type": "string", + "pattern": "^\\S+\\.bai$", "format": "file-path", "exists": true }, "cram": { "errorMessage": "CRAM file cannot contain spaces and must have extension '.cram'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.cram$" - }, - { - "type": "string", - "maxLength": 0 - } - ], + "type": "string", + "pattern": "^\\S+\\.cram$", "format": "file-path", "exists": true }, "crai": { "errorMessage": "CRAM index file cannot contain spaces and must have extension '.crai'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.crai$" - }, - { - "type": "string", - "maxLength": 0 - } - ], + "type": "string", + "pattern": "^\\S+\\.crai$", "format": "file-path", "exists": true } }, - "required": ["sample"] + "required": ["sample"], + "oneOf": [{ "required": ["bam"] }, { "required": ["cram"] }] } } From 8de2144dc1ded97691409e8a797778f2ec7c5fe4 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 1 Jul 2025 14:56:02 +0200 Subject: [PATCH 209/234] update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2460960..d99d8a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,6 +43,8 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#50](https://github.com/nf-core/createpanelrefs/pull/50) - Fix mutect2 that wasn't working without a bed file - [#53](https://github.com/nf-core/createpanelrefs/pull/53) - Minor syntax fixes due to [#50](https://github.com/nf-core/createpanelrefs/pull/50) - [#54](https://github.com/nf-core/createpanelrefs/pull/54) - Fix name for `_mqc_versions.yml` file +- [#56](https://github.com/nf-core/createpanelrefs/pull/56) - Fix gcnv interval list +- [#57](https://github.com/nf-core/createpanelrefs/pull/57) - Improve syntax in `assets/schema_input.json` file, from @nvnieuwk in [#46](https://github.com/nf-core/createpanelrefs/pull/46) ### `Dependencies` From a444cc441aef54384e6d2c223014318fe8efd2c5 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 1 Jul 2025 15:13:13 +0200 Subject: [PATCH 210/234] update docs --- docs/output.md | 41 +++++++++++++++++++++++++++++++++++++++++ docs/usage.md | 18 +++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index e04d535..b2c3caf 100644 --- a/docs/output.md +++ b/docs/output.md @@ -11,6 +11,8 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - [GATK's germlinecnvcaller](#germlinecnvcaller) - Publish read counts, ploidy and cnvcalling models that can be used to call cnv's in the case mode. +- [GATK's Mutect2](#gatk-mutect2) - Create panel of normals for somatic variant calling. +- [GENS](#gens) - Create panel of normals for read-count denoising. - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution @@ -34,6 +36,45 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d [GATK](https://github.com/broadinstitute/gatk) is a toolkit which offers a wide variety of tools with a primary focus on variant discovery and genotyping. In this pipeline we have implemented GATK's germlinecnvcalling workflow for analysing a cohort of samples. The output files generated from this analysis can be used for analysing samples in case mode. For more information about the workflow and output files, see GATK's documentation [here.](https://gatk.broadinstitute.org/hc/en-us/articles/360035531152--How-to-Call-common-and-rare-germline-copy-number-variants) +### GATK Mutect2 + +
+Output files + +- `results/gatk4/` + - `mutect2/` + - `*.vcf.gz`: Compressed VCF files containing somatic variant calls for each sample. + - `*.vcf.gz.tbi`: Tabix index files for the VCF files. + - `*.vcf.gz.stats`: Statistics files containing detailed metrics for each sample. + - `*.f1r2.tar.gz`: Files containing information for LearnReadOrientationModel (only output when tumor-normal pair mode is run). + - `genomicsdb/` + - `{pon_name}/`: GenomicsDB workspace containing all sample VCFs combined. + - `createsomaticpanelofnormals/` + - `{pon_name}.vcf.gz`: Final panel of normals VCF file. + - `{pon_name}.vcf.gz.tbi`: Tabix index for the panel of normals VCF. + +
+ +[GATK Mutect2](https://gatk.broadinstitute.org/hc/en-us/articles/360035894731-Mutect2) creates a panel of normals from normal samples for somatic variant calling. The workflow: (1) calls variants in each normal sample using Mutect2 in panel of normals mode, (2) imports all VCFs into a GenomicsDB workspace, and (3) creates a final panel of normals VCF file. This panel can be used with Mutect2 in case mode via the `--panel-of-normals` parameter to filter out common germline variants and sequencing artifacts. + +### GENS + +
+Output files + +- `results/gatk4/` + - `collectreadcounts/` + - `*.hdf5`: Read count data in HDF5 format for each sample. + - `*.tsv`: Read count data in TSV format for each sample. + - `createreadcountpanelofnormals/` + - `{pon_name}.hdf5`: Final panel of normals file in HDF5 format. + - `references/intervals/gens_pon/` + - `*.interval_list`: Interval list file used for read count collection. + +
+ +[GENS](https://github.com/Clinical-Genomics-Lund/gens) creates a panel of normals for read-count denoising to improve somatic variant detection. The workflow: (1) indexes BAM/CRAM files if needed, (2) collects read counts at specified intervals using GATK's CollectReadCounts, and (3) creates a panel of normals using GATK's CreateReadCountPanelOfNormals. This panel can be used with GENS for somatic variant calling to reduce technical noise and improve variant detection sensitivity. + ### MultiQC
diff --git a/docs/usage.md b/docs/usage.md index 290002a..cd6f71a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -125,7 +125,7 @@ process { ### gens -If you are running the pipeline to generate references for the gens workflow, you should ensure that you have provided all the mandatory options specified in the table below. +If you are running the pipeline to generate references for the GENS workflow, you should ensure that you have provided all the mandatory options specified in the table below. | Mandatory | Optional | | ------------- | --------------------- | @@ -134,6 +134,9 @@ If you are running the pipeline to generate references for the gens workflow, yo | | gens_bin_length | | | gens_pon_name | | | gens_readcount_format | +| | gens_interval_list | + +The GENS workflow creates a panel of normals for read-count denoising from normal samples. This panel can be used with GENS for somatic variant calling to reduce technical noise and improve variant detection. For more information, see the [GENS documentation](https://github.com/Clinical-Genomics-Lund/gens). ### germlinecnvcaller @@ -155,6 +158,19 @@ If you are running the pipeline to generate references for the GATK's germlinecn 1 To learn more about this file, see [this comment](https://gatk.broadinstitute.org/hc/en-us/community/posts/360074399831/comments/13441240230299) on GATK forum.
+### mutect2 + +If you are running the pipeline to generate references for the GATK's Mutect2 somatic variant calling workflow, you should ensure that you have provided all the mandatory options specified in the table below. + +| Mandatory | Optional | +| ------------- | ------------------ | +| fasta/genomes | fai | +| | dict | +| | mutect2_target_bed | +| | mutect2_pon_name | + +The Mutect2 workflow creates a panel of normals from normal samples for somatic variant calling. This panel can be used with Mutect2 in case mode via the `--panel-of-normals` parameter to filter out common germline variants and sequencing artifacts. For more information, see the [GATK documentation](https://gatk.broadinstitute.org/hc/en-us/articles/360035894731-Mutect2). + ## Core Nextflow arguments > [!NOTE] From 6c7af7cb753736ad042ff4dd6f84ca97fa69aecc Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 1 Jul 2025 15:14:22 +0200 Subject: [PATCH 211/234] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d99d8a3..385348c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#54](https://github.com/nf-core/createpanelrefs/pull/54) - Fix name for `_mqc_versions.yml` file - [#56](https://github.com/nf-core/createpanelrefs/pull/56) - Fix gcnv interval list - [#57](https://github.com/nf-core/createpanelrefs/pull/57) - Improve syntax in `assets/schema_input.json` file, from @nvnieuwk in [#46](https://github.com/nf-core/createpanelrefs/pull/46) +- [#57](https://github.com/nf-core/createpanelrefs/pull/57) - Fix missing documentation for GATK Mutect2 and GENS ### `Dependencies` From a41f920207cf85bfcf59721fbef106106938e458 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 8 Jul 2025 11:38:20 +0000 Subject: [PATCH 212/234] Template update for nf-core/tools version 3.3.2 --- .github/PULL_REQUEST_TEMPLATE.md | 4 +- .github/actions/nf-test/action.yml | 4 - .github/workflows/linting.yml | 2 +- .github/workflows/linting_comment.yml | 2 +- .github/workflows/nf-test.yml | 45 +++---- .github/workflows/release-announcements.yml | 2 +- .nf-core.yml | 4 +- .pre-commit-config.yaml | 2 +- CHANGELOG.md | 2 +- README.md | 6 +- assets/multiqc_config.yml | 4 +- assets/schema_input.json | 6 +- conf/base.config | 1 + modules.json | 4 +- modules/nf-core/fastqc/environment.yml | 2 + modules/nf-core/fastqc/main.nf | 2 +- modules/nf-core/fastqc/meta.yml | 23 ++-- modules/nf-core/multiqc/environment.yml | 4 +- modules/nf-core/multiqc/main.nf | 4 +- modules/nf-core/multiqc/meta.yml | 110 ++++++++++-------- .../nf-core/multiqc/tests/main.nf.test.snap | 18 +-- nextflow.config | 11 +- nextflow_schema.json | 2 +- nf-test.config | 2 +- ro-crate-metadata.json | 22 ++-- .../tests/nextflow.config | 2 +- tests/.nftignore | 1 + tests/nextflow.config | 6 +- 28 files changed, 160 insertions(+), 137 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index af974b5..4d074d2 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -8,14 +8,14 @@ These are the most common things requested on pull requests (PRs). Remember that PRs should be made against the dev branch, unless you're preparing a pipeline release. -Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/createpanelrefs/tree/master/.github/CONTRIBUTING.md) +Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/createpanelrefs/tree/main/.github/CONTRIBUTING.md) --> ## PR checklist - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/createpanelrefs/tree/master/.github/CONTRIBUTING.md) +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/createpanelrefs/tree/main/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/createpanelrefs _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core pipelines lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml index 243e782..bf44d96 100644 --- a/.github/actions/nf-test/action.yml +++ b/.github/actions/nf-test/action.yml @@ -54,13 +54,9 @@ runs: conda-solver: libmamba conda-remove-defaults: true - # TODO Skip failing conda tests and document their failures - # https://github.com/nf-core/modules/issues/7017 - name: Run nf-test shell: bash env: - NFT_DIFF: ${{ env.NFT_DIFF }} - NFT_DIFF_ARGS: ${{ env.NFT_DIFF_ARGS }} NFT_WORKDIR: ${{ env.NFT_WORKDIR }} run: | nf-test test \ diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index f2d7d1d..8b0f88c 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -13,7 +13,7 @@ jobs: steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - - name: Set up Python 3.12 + - name: Set up Python 3.13 uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.13" diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 7e8050f..d43797d 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@4c1e823582f43b179e2cbb49c3eade4e41f992e2 # v10 + uses: dawidd6/action-download-artifact@ac66b43f0e6a346234dd65d4d0c8fbb31cb316e5 # v11 with: workflow: linting.yml workflow_conclusion: completed diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index f03aea0..e7b5844 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -1,12 +1,5 @@ name: Run nf-test on: - push: - paths-ignore: - - "docs/**" - - "**/meta.yml" - - "**/*.md" - - "**/*.png" - - "**/*.svg" pull_request: paths-ignore: - "docs/**" @@ -35,7 +28,7 @@ jobs: nf-test-changes: name: nf-test-changes runs-on: # use self-hosted runners - - runs-on=$-nf-test-changes + - runs-on=${{ github.run_id }}-nf-test-changes - runner=4cpu-linux-x64 outputs: shard: ${{ steps.set-shards.outputs.shard }} @@ -69,7 +62,7 @@ jobs: needs: [nf-test-changes] if: ${{ needs.nf-test-changes.outputs.total_shards != '0' }} runs-on: # use self-hosted runners - - runs-on=$-nf-test + - runs-on=${{ github.run_id }}-nf-test - runner=4cpu-linux-x64 strategy: fail-fast: false @@ -85,7 +78,7 @@ jobs: - isMain: false profile: "singularity" NXF_VER: - - "24.04.2" + - "24.10.5" - "latest-everything" env: NXF_ANSI_LOG: false @@ -97,23 +90,39 @@ jobs: fetch-depth: 0 - name: Run nf-test + id: run_nf_test uses: ./.github/actions/nf-test + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} env: - NFT_DIFF: ${{ env.NFT_DIFF }} - NFT_DIFF_ARGS: ${{ env.NFT_DIFF_ARGS }} NFT_WORKDIR: ${{ env.NFT_WORKDIR }} with: profile: ${{ matrix.profile }} shard: ${{ matrix.shard }} total_shards: ${{ env.TOTAL_SHARDS }} + + - name: Report test status + if: ${{ always() }} + run: | + if [[ "${{ steps.run_nf_test.outcome }}" == "failure" ]]; then + echo "::error::Test with ${{ matrix.NXF_VER }} failed" + # Add to workflow summary + echo "## ❌ Test failed: ${{ matrix.profile }} | ${{ matrix.NXF_VER }} | Shard ${{ matrix.shard }}/${{ env.TOTAL_SHARDS }}" >> $GITHUB_STEP_SUMMARY + if [[ "${{ matrix.NXF_VER }}" == "latest-everything" ]]; then + echo "::warning::Test with latest-everything failed but will not cause workflow failure. Please check if the error is expected or if it needs fixing." + fi + if [[ "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then + exit 1 + fi + fi + confirm-pass: needs: [nf-test] if: always() runs-on: # use self-hosted runners - - runs-on=$-confirm-pass + - runs-on=${{ github.run_id }}-confirm-pass - runner=2cpu-linux-x64 steps: - - name: One or more tests failed + - name: One or more tests failed (excluding latest-everything) if: ${{ contains(needs.*.result, 'failure') }} run: exit 1 @@ -132,11 +141,3 @@ jobs: echo "DEBUG: toJSON(needs) = ${{ toJSON(needs) }}" echo "DEBUG: toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" echo "::endgroup::" - - - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner - if: always() - run: | - ls -la ./ - rm -rf ./* || true - rm -rf ./.??* || true - ls -la ./ diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index 4abaf48..0f73249 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -30,7 +30,7 @@ jobs: bsky-post: runs-on: ubuntu-latest steps: - - uses: zentered/bluesky-post-action@4aa83560bb3eac05dbad1e5f221ee339118abdd2 # v0.2.0 + - uses: zentered/bluesky-post-action@6461056ea355ea43b977e149f7bf76aaa572e5e8 # v0.3.0 with: post: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! diff --git a/.nf-core.yml b/.nf-core.yml index 28e1a22..c72ef97 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -6,7 +6,7 @@ lint: - .gitattributes - .gitignore modules_config: false -nf_core_version: 3.3.1 +nf_core_version: 3.3.2 repository_type: pipeline template: author: "@maxulysse" @@ -17,4 +17,4 @@ template: name: createpanelrefs org: nf-core outdir: . - version: 1.0dev + version: 1.0.0 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9d0b248..bb41bee 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ repos: hooks: - id: prettier additional_dependencies: - - prettier@3.5.0 + - prettier@3.6.2 - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: diff --git a/CHANGELOG.md b/CHANGELOG.md index 92e5780..fcc9997 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0dev - [date] +## v1.0.0 - [date] Initial release of nf-core/createpanelrefs, created with the [nf-core](https://nf-co.re/) template. diff --git a/README.md b/README.md index 489dfbb..e51251f 100644 --- a/README.md +++ b/README.md @@ -5,12 +5,12 @@ -[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml) +[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml) [![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.04.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) -[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.1) +[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 767cbd1..a905e53 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/createpanelrefs + This report has been generated by the nf-core/createpanelrefs analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-createpanelrefs-methods-description": order: -1000 diff --git a/assets/schema_input.json b/assets/schema_input.json index e749db3..ec9b8f5 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/nf-core/createpanelrefs/master/assets/schema_input.json", + "$id": "https://raw.githubusercontent.com/nf-core/createpanelrefs/main/assets/schema_input.json", "title": "nf-core/createpanelrefs pipeline - params.input schema", "description": "Schema for the file provided with params.input", "type": "array", @@ -17,14 +17,14 @@ "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, "fastq_2": { "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "pattern": "^([\\S\\s]*\\/)?[^\\s\\/]+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" } }, diff --git a/conf/base.config b/conf/base.config index be34b4d..5da325b 100644 --- a/conf/base.config +++ b/conf/base.config @@ -61,5 +61,6 @@ process { } withLabel: process_gpu { ext.use_gpu = { workflow.profile.contains('gpu') } + accelerator = { workflow.profile.contains('gpu') ? 1 : null } } } diff --git a/modules.json b/modules.json index 2681663..3d174e2 100644 --- a/modules.json +++ b/modules.json @@ -7,12 +7,12 @@ "nf-core": { "fastqc": { "branch": "master", - "git_sha": "08108058ea36a63f141c25c4e75f9f872a5b2296", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", "installed_by": ["modules"] } } diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml index 691d4c7..f9f54ee 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/fastqc/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 033f415..23e1663 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -29,7 +29,7 @@ process FASTQC { // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label - def memory_in_mb = task.memory ? task.memory.toUnit('MB').toFloat() / task.cpus : null + def memory_in_mb = task.memory ? task.memory.toUnit('MB') / task.cpus : null // FastQC memory value allowed range (100 - 10000) def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 2b2e62b..c8d9d02 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -29,9 +29,10 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + ontologies: [] output: - - html: - - meta: + html: + - - meta: type: map description: | Groovy Map containing sample information @@ -40,8 +41,9 @@ output: type: file description: FastQC report pattern: "*_{fastqc.html}" - - zip: - - meta: + ontologies: [] + zip: + - - meta: type: map description: | Groovy Map containing sample information @@ -50,11 +52,14 @@ output: type: file description: FastQC report archive pattern: "*_{fastqc.zip}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@drpatelh" - "@grst" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index a27122c..812fc4c 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,5 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::multiqc=1.27 + - bioconda::multiqc=1.29 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 58d9313..0ac3c36 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.27--pyhdfd78af_0' : - 'biocontainers/multiqc:1.27--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.29--pyhdfd78af_0' : + 'biocontainers/multiqc:1.29--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index b16c187..ce30eb7 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -15,57 +15,71 @@ tools: licence: ["GPL-3.0-or-later"] identifier: biotools:multiqc input: - - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections - in multiqc_config. - pattern: "*.{yml,yaml}" - - - multiqc_logo: + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + ontologies: [] + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + ontologies: [] + - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV +output: + report: + - "*multiqc_report.html": type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" - - - replace_names: + description: MultiQC report file + pattern: "multiqc_report.html" + ontologies: [] + data: + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + plots: + - "*_plots": type: file - description: | - Optional two-column sample renaming file. First column a set of - patterns, second column a set of corresponding replacements. Passed via - MultiQC's `--replace-names` option. - pattern: "*.{tsv}" - - - sample_names: + description: Plots created by MultiQC + pattern: "*_data" + ontologies: [] + versions: + - versions.yml: type: file - description: | - Optional TSV file with headers, passed to the MultiQC --sample_names - argument. - pattern: "*.{tsv}" -output: - - report: - - "*multiqc_report.html": - type: file - description: MultiQC report file - pattern: "multiqc_report.html" - - data: - - "*_data": - type: directory - description: MultiQC data dir - pattern: "multiqc_data" - - plots: - - "*_plots": - type: file - description: Plots created by MultiQC - pattern: "*_data" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@abhi18av" - "@bunop" diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index 7b7c132..88e9057 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,8f3b8c1cec5388cf2708be948c9fa42f" + "versions.yml:md5,c1fe644a37468f6dae548d98bc72c2c1" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.4" + "nextflow": "25.04.2" }, - "timestamp": "2025-01-27T09:29:57.631982377" + "timestamp": "2025-05-22T11:50:41.182332996" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,8f3b8c1cec5388cf2708be948c9fa42f" + "versions.yml:md5,c1fe644a37468f6dae548d98bc72c2c1" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.4" + "nextflow": "25.04.2" }, - "timestamp": "2025-01-27T09:30:34.743726958" + "timestamp": "2025-05-22T11:51:22.448739369" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,8f3b8c1cec5388cf2708be948c9fa42f" + "versions.yml:md5,c1fe644a37468f6dae548d98bc72c2c1" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.4" + "nextflow": "25.04.2" }, - "timestamp": "2025-01-27T09:30:21.44383553" + "timestamp": "2025-05-22T11:51:06.198928424" } } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 24e852e..cf6ffa7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -235,7 +235,6 @@ dag { manifest { name = 'nf-core/createpanelrefs' - author = """@maxulysse""" // The author field is deprecated from Nextflow version 24.10.0, use contributors instead contributors = [ // TODO nf-core: Update the field with the details of the contributors to your pipeline. New with Nextflow version 24.10.0 [ @@ -250,15 +249,15 @@ manifest { homePage = 'https://github.com/nf-core/createpanelrefs' description = """Generate Panel of Normals, models or other similar references from lots of samples""" mainScript = 'main.nf' - defaultBranch = 'master' - nextflowVersion = '!>=24.04.2' - version = '1.0dev' + defaultBranch = 'main' + nextflowVersion = '!>=24.10.5' + version = '1.0.0' doi = '' } // Nextflow plugins plugins { - id 'nf-schema@2.3.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.4.2' // Validation of pipeline parameters and creation of an input channel from a sample sheet } validation { @@ -284,7 +283,7 @@ validation { https://doi.org/10.1038/s41587-020-0439-x * Software dependencies - https://github.com/nf-core/createpanelrefs/blob/master/CITATIONS.md + https://github.com/nf-core/createpanelrefs/blob/main/CITATIONS.md """ } summary { diff --git a/nextflow_schema.json b/nextflow_schema.json index 44e4708..9fbd822 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/nf-core/createpanelrefs/master/nextflow_schema.json", + "$id": "https://raw.githubusercontent.com/nf-core/createpanelrefs/main/nextflow_schema.json", "title": "nf-core/createpanelrefs pipeline parameters", "description": "Generate Panel of Normals, models or other similar references from lots of samples", "type": "object", diff --git a/nf-test.config b/nf-test.config index 889df76..3a1fff5 100644 --- a/nf-test.config +++ b/nf-test.config @@ -9,7 +9,7 @@ config { configFile "tests/nextflow.config" // ignore tests coming from the nf-core/modules repo - ignore 'modules/nf-core/**/*', 'subworkflows/nf-core/**/*' + ignore 'modules/nf-core/**/tests/*', 'subworkflows/nf-core/**/tests/*' // run all test with defined profile(s) from the main nextflow.config profile "test" diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 2494148..38ac151 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -21,9 +21,9 @@ { "@id": "./", "@type": "Dataset", - "creativeWorkStatus": "InProgress", - "datePublished": "2025-06-03T11:01:15+00:00", - "description": "

\n \n \n \"nf-core/createpanelrefs\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.04.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics pipeline that ...\n\n\n\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\n\nNow, you can run the pipeline using:\n\n\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "creativeWorkStatus": "Stable", + "datePublished": "2025-07-08T11:38:16+00:00", + "description": "

\n \n \n \"nf-core/createpanelrefs\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics pipeline that ...\n\n\n\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\n\nNow, you can run the pipeline using:\n\n\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -99,7 +99,7 @@ }, "mentions": [ { - "@id": "#3adc6bc9-5659-4ed2-aff7-659417321aab" + "@id": "#c92fc7bd-512a-4c0c-8702-9b0f88a36652" } ], "name": "nf-core/createpanelrefs" @@ -123,7 +123,7 @@ "@id": "main.nf", "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], "dateCreated": "", - "dateModified": "2025-06-03T11:01:15Z", + "dateModified": "2025-07-08T11:38:16Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": ["nf-core", "nextflow"], "license": ["MIT"], @@ -134,8 +134,8 @@ "sdPublisher": { "@id": "https://nf-co.re/" }, - "url": ["https://github.com/nf-core/createpanelrefs", "https://nf-co.re/createpanelrefs/dev/"], - "version": ["1.0dev"] + "url": ["https://github.com/nf-core/createpanelrefs", "https://nf-co.re/createpanelrefs/1.0.0/"], + "version": ["1.0.0"] }, { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", @@ -147,14 +147,14 @@ "url": { "@id": "https://www.nextflow.io/" }, - "version": "!>=24.04.2" + "version": "!>=24.10.5" }, { - "@id": "#3adc6bc9-5659-4ed2-aff7-659417321aab", + "@id": "#c92fc7bd-512a-4c0c-8702-9b0f88a36652", "@type": "TestSuite", "instance": [ { - "@id": "#0f1c2baa-1425-40cc-9f1d-c2c88cf08e1b" + "@id": "#9cfceb7e-0134-4fcf-a98b-804534d859ad" } ], "mainEntity": { @@ -163,7 +163,7 @@ "name": "Test suite for nf-core/createpanelrefs" }, { - "@id": "#0f1c2baa-1425-40cc-9f1d-c2c88cf08e1b", + "@id": "#9cfceb7e-0134-4fcf-a98b-804534d859ad", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/createpanelrefs", "resource": "repos/nf-core/createpanelrefs/actions/workflows/nf-test.yml", diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config index 0907ac5..09ef842 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -1,5 +1,5 @@ plugins { - id "nf-schema@2.1.0" + id "nf-schema@2.4.2" } validation { diff --git a/tests/.nftignore b/tests/.nftignore index c10bc1f..158c83c 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -1,5 +1,6 @@ .DS_Store multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt +multiqc/multiqc_data/BETA-multiqc.parquet multiqc/multiqc_data/multiqc.log multiqc/multiqc_data/multiqc_data.json multiqc/multiqc_data/multiqc_sources.txt diff --git a/tests/nextflow.config b/tests/nextflow.config index 70081af..f61b760 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -6,7 +6,9 @@ // TODO nf-core: Specify any additional parameters here // Or any resources requirements -params.modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' -params.pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/createpanelrefs' +params { + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/createpanelrefs' +} aws.client.anonymous = true // fixes S3 access issues on self-hosted runners From 61f89300da9ef672c70f532e04f3ad2bc7cd6ff9 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 10 Jul 2025 13:12:42 +0200 Subject: [PATCH 213/234] update nf-core.yml --- .nf-core.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index c72ef97..95b4c4b 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,10 +1,8 @@ lint: files_exist: - - .github/workflows/ci.yml - conf/modules.config files_unchanged: - - .gitattributes - - .gitignore + - .github/PULL_REQUEST_TEMPLATE.md modules_config: false nf_core_version: 3.3.2 repository_type: pipeline From c96a20fbccdaefa23f5d9cb9facef5228d00f105 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 10 Jul 2025 13:14:32 +0200 Subject: [PATCH 214/234] fix linting --- .gitattributes | 1 - .gitignore | 10 +++++----- .prettierignore | 17 ++++++++--------- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/.gitattributes b/.gitattributes index 31ba574..7a2dabc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,4 +2,3 @@ *.nf.test linguist-language=nextflow modules/nf-core/** linguist-generated subworkflows/nf-core/** linguist-generated -tests/**/*nf.test.snap linguist-generated diff --git a/.gitignore b/.gitignore index cc50d2f..9e30720 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,11 @@ -*.pyc -.DS_Store .nextflow* -.nf-test* +work/ data/ results/ +.DS_Store +testing/ testing* +*.pyc null/ -testing/ -work/ +.nf-test .nf-test.log diff --git a/.prettierignore b/.prettierignore index e8e679a..edd29f0 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,14 +1,13 @@ -*.pyc -.DS_Store -.nextflow* -.nf-test/ +email_template.html adaptivecard.json -bin/ +slackreport.json +.nextflow* +work/ data/ -email_template.html results/ -slackreport.json +.DS_Store +testing/ testing* +*.pyc +bin/ ro-crate-metadata.json -testing/ -work/ From 70e71230e8b2f8850335ce58fddd488f0365a1d7 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 23 Sep 2025 11:42:47 +0200 Subject: [PATCH 215/234] no ext.when in configs --- conf/modules/cnvkit.config | 2 -- conf/modules/gens_pon.config | 2 -- conf/modules/germlinecnvcaller_cohort.config | 18 ------------------ conf/modules/mutect2.config | 2 -- 4 files changed, 24 deletions(-) diff --git a/conf/modules/cnvkit.config b/conf/modules/cnvkit.config index 74ae34c..c0396a0 100644 --- a/conf/modules/cnvkit.config +++ b/conf/modules/cnvkit.config @@ -11,7 +11,6 @@ */ process { - withName: SAMTOOLS_VIEW { ext.args = {"--output-fmt bam"} } @@ -26,5 +25,4 @@ process { ] ] } - } diff --git a/conf/modules/gens_pon.config b/conf/modules/gens_pon.config index e2ac2ae..e0d99c8 100644 --- a/conf/modules/gens_pon.config +++ b/conf/modules/gens_pon.config @@ -11,7 +11,6 @@ */ process { - withName: '.*GENS_PON.*' { publishDir = [ enabled: false @@ -36,5 +35,4 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - } diff --git a/conf/modules/germlinecnvcaller_cohort.config b/conf/modules/germlinecnvcaller_cohort.config index 8a4d21e..0037bca 100644 --- a/conf/modules/germlinecnvcaller_cohort.config +++ b/conf/modules/germlinecnvcaller_cohort.config @@ -11,29 +11,12 @@ */ process { - withName: '.*GERMLINECNVCALLER_COHORT.*' { publishDir = [ enabled: false ] } - withName: '.*GERMLINECNVCALLER_COHORT:GATK4_INDEXFEATUREFILE_MAPPABILITY' { - ext.when = { !params.gcnv_mappable_regions.equals(null) } - } - - withName: '.*GERMLINECNVCALLER_COHORT:GATK4_INDEXFEATUREFILE_SEGDUP' { - ext.when = { !params.gcnv_segmental_duplications.equals(null) } - } - - withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST_TARGETS' { - ext.when = { params.gcnv_analysis_type.equals("wes") && params.gcnv_target_interval_list.equals(null) && params.gcnv_target_bed } - } - - withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST_EXCLUDE' { - ext.when = { params.gcnv_analysis_type.equals("wes") && params.gcnv_exclude_interval_list.equals(null) && params.gcnv_exclude_bed } - } - withName: '.*GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS' { ext.args = { ["--imr OVERLAPPING_ONLY", "--padding ${params.gcnv_padding}", @@ -82,5 +65,4 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - } diff --git a/conf/modules/mutect2.config b/conf/modules/mutect2.config index 9bbbb9d..7e56a51 100644 --- a/conf/modules/mutect2.config +++ b/conf/modules/mutect2.config @@ -11,9 +11,7 @@ */ process { - withName: '.*BAM_CREATE_SOM_PON_GATK:GATK4_MUTECT2' { ext.args = "--max-mnp-distance 0" } - } From 2380b5eea569324354f532692121f81ba0253bc5 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 23 Sep 2025 11:50:48 +0200 Subject: [PATCH 216/234] update docs --- docs/output.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/output.md b/docs/output.md index b2c3caf..3480956 100644 --- a/docs/output.md +++ b/docs/output.md @@ -10,12 +10,30 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: +- [CNVKit](#cnvkit) - Create reference files for copy number variant detection from sequencing data. - [GATK's germlinecnvcaller](#germlinecnvcaller) - Publish read counts, ploidy and cnvcalling models that can be used to call cnv's in the case mode. - [GATK's Mutect2](#gatk-mutect2) - Create panel of normals for somatic variant calling. - [GENS](#gens) - Create panel of normals for read-count denoising. - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +### CNVKit + +
+Output files + +- `results/reference/cnvkit/` + - "panel.cnn": Panel reference file containing coverage information for copy number. + - ".antitargetcoverage.cnn": Antitarget coverage file for each sample. + - ".targetcoverage.cnn": Target coverage file for each sample. + +
+ +[CNVKit](https://cnvkit.readthedocs.io/en/stable/index.html) is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. +In this pipeline, CNVKit creates reference files that can be used for copy number variant detection. +The workflow processes normal samples to generate a reference CNN file that captures the baseline coverage patterns, which can then be used for tumor-only or tumor-normal CNV analysis in downstream applications. +The reference file contains coverage information normalized across the cohort and is essential for accurate copy number calling. + ### GATK germlinecnvcaller
From 61fe91bdb8eb5a0bdbbcbe87773ef514fafb3117 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Tue, 23 Sep 2025 11:54:02 +0200 Subject: [PATCH 217/234] steal test data from sarek --- conf/test_full.config | 4 ++-- ro-crate-metadata.json | 40 ++++++++++++++++++++++++++++++++++------ 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/conf/test_full.config b/conf/test_full.config index cb4a8c6..860f66b 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,8 +17,8 @@ params { // Input data for full size test // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = "https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/NA12878_Agilent_full_test.csv" // Genome references - genome = 'R64-1-1' + genome = 'GATK.GRCh38' } diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 5cf12cd..cb57ff3 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,7 +22,7 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "Stable", - "datePublished": "2025-07-10T10:35:56+00:00", + "datePublished": "2025-09-23T09:50:54+00:00", "description": "

\n \n \n \"nf-core/createpanelrefs\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics helper pipeline that will help in creating panel of normals and other models.\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n2. Build Panel of Normals for [`CNVKIT`](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873)\n3. Build ploidy and cnv calling models for [`GATK's germlinecnvcaller workflow`](https://genome.cshlp.org/content/20/9/1297)\n4. Build Panel of Normals for [`GENS`](https://github.com/Clinical-Genomics-Lund/gens)\n5. Build Panel of Normals for [`Mutect2`](https://genome.cshlp.org/content/20/9/1297)\n6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,bam,bai,cram,crai\nsample1,sample1.bam,sample1.bai,,\nsample2,sample2.bam,,,\nsample3,sample3.bam,sample3.bai,,\nsample4,sample4.bam,,,\n```\n\nEach row in the samplesheet represents an alignment file, and it is important that you provide the files in the right format for the analysis you want to run.\n\n| Tool | Alignment format |\n| ----------------- | ---------------------------- |\n| cnvkit | bam |\n| germlinecnvcaller | bam or cram or a mix of both |\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --tools \\\n --genome GATK.GRCh38 \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n@marrip contributed in the idea that started it all.\n@matthdsm and @FriederikeHanssen contributed in the actual design.\n@ramprasadn's interest was the final push that led to the creation.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @jfy133\n- @JoseEspinosa\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { @@ -99,7 +99,7 @@ }, "mentions": [ { - "@id": "#25edcf53-2601-4c64-89af-31abf27707b1" + "@id": "#ffc13dc0-c803-4672-b625-937ec59f8d97" } ], "name": "nf-core/createpanelrefs" @@ -126,8 +126,16 @@ "SoftwareSourceCode", "ComputationalWorkflow" ], + "creator": [ + { + "@id": "https://orcid.org/0000-0001-7313-3734" + }, + { + "@id": "#max.u.garcia@gmail.com" + } + ], "dateCreated": "", - "dateModified": "2025-07-10T12:35:56Z", + "dateModified": "2025-09-23T11:50:54Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -136,6 +144,14 @@ "license": [ "MIT" ], + "maintainer": [ + { + "@id": "https://orcid.org/0000-0001-7313-3734" + }, + { + "@id": "#max.u.garcia@gmail.com" + } + ], "name": [ "nf-core/createpanelrefs" ], @@ -166,11 +182,11 @@ "version": "!>=24.10.5" }, { - "@id": "#25edcf53-2601-4c64-89af-31abf27707b1", + "@id": "#ffc13dc0-c803-4672-b625-937ec59f8d97", "@type": "TestSuite", "instance": [ { - "@id": "#f227dd1a-5f96-4dfa-acc7-a7a4c5061848" + "@id": "#edc7e61f-cf5b-463d-900f-d4f7d5330fb1" } ], "mainEntity": { @@ -179,7 +195,7 @@ "name": "Test suite for nf-core/createpanelrefs" }, { - "@id": "#f227dd1a-5f96-4dfa-acc7-a7a4c5061848", + "@id": "#edc7e61f-cf5b-463d-900f-d4f7d5330fb1", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/createpanelrefs", "resource": "repos/nf-core/createpanelrefs/actions/workflows/nf-test.yml", @@ -306,6 +322,18 @@ "@type": "Organization", "name": "nf-core", "url": "https://nf-co.re/" + }, + { + "@id": "https://orcid.org/0000-0001-7313-3734", + "@type": "Person", + "email": "20065894+ramprasadn@users.noreply.github.com", + "name": "Ramprasad Neethiraj" + }, + { + "@id": "#max.u.garcia@gmail.com", + "@type": "Person", + "email": "max.u.garcia@gmail.com", + "name": "Maxime U Garcia" } ] } \ No newline at end of file From 83641ddeb6e8c6bcef1b7c2812231d6ac06f4dde Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 16 Oct 2025 13:37:56 +0000 Subject: [PATCH 218/234] Template update for nf-core/tools version 3.4.1 --- .devcontainer/devcontainer.json | 28 ++++---- .devcontainer/setup.sh | 13 ++++ .github/actions/nf-test/action.yml | 6 +- .github/workflows/awsfulltest.yml | 12 ++-- .github/workflows/awstest.yml | 12 ++-- .github/workflows/clean-up.yml | 2 +- .github/workflows/download_pipeline.yml | 6 +- .github/workflows/fix_linting.yml | 16 ++--- .github/workflows/linting.yml | 14 ++-- .github/workflows/linting_comment.yml | 2 +- .github/workflows/nf-test.yml | 9 +-- .github/workflows/release-announcements.yml | 7 ++ .../workflows/template-version-comment.yml | 2 +- .gitpod.yml | 10 --- .nf-core.yml | 6 +- .pre-commit-config.yaml | 2 +- .prettierignore | 1 + README.md | 5 +- docs/usage.md | 2 +- main.nf | 5 +- modules.json | 8 +-- modules/nf-core/multiqc/environment.yml | 2 +- modules/nf-core/multiqc/main.nf | 4 +- .../nf-core/multiqc/tests/main.nf.test.snap | 18 +++--- modules/nf-core/multiqc/tests/tags.yml | 2 - nextflow.config | 64 ++++++------------- nextflow_schema.json | 12 ++++ ro-crate-metadata.json | 16 ++--- .../main.nf | 31 ++++++++- .../utils_nextflow_pipeline/tests/tags.yml | 2 - .../utils_nfcore_pipeline/tests/tags.yml | 2 - .../nf-core/utils_nfschema_plugin/main.nf | 40 ++++++++++-- .../utils_nfschema_plugin/tests/main.nf.test | 56 ++++++++++++++++ .../tests/nextflow.config | 4 +- tests/.nftignore | 3 +- tests/default.nf.test | 2 - 36 files changed, 268 insertions(+), 158 deletions(-) create mode 100755 .devcontainer/setup.sh delete mode 100644 .gitpod.yml delete mode 100644 modules/nf-core/multiqc/tests/tags.yml delete mode 100644 subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml delete mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index b290e09..97c8c97 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,20 +1,20 @@ { "name": "nfcore", - "image": "nfcore/gitpod:latest", - "remoteUser": "gitpod", - "runArgs": ["--privileged"], + "image": "nfcore/devcontainer:latest", - // Configure tool-specific properties. - "customizations": { - // Configure properties specific to VS Code. - "vscode": { - // Set *default* container specific settings.json values on container create. - "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python" - }, + "remoteUser": "root", + "privileged": true, - // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] - } + "remoteEnv": { + // Workspace path on the host for mounting with docker-outside-of-docker + "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" + }, + + "onCreateCommand": "./.devcontainer/setup.sh", + + "hostRequirements": { + "cpus": 4, + "memory": "16gb", + "storage": "32gb" } } diff --git a/.devcontainer/setup.sh b/.devcontainer/setup.sh new file mode 100755 index 0000000..5b9444b --- /dev/null +++ b/.devcontainer/setup.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# Customise the terminal command prompt +echo "export PROMPT_DIRTRIM=2" >> $HOME/.bashrc +echo "export PS1='\[\e[3;36m\]\w ->\[\e[0m\\] '" >> $HOME/.bashrc +export PROMPT_DIRTRIM=2 +export PS1='\[\e[3;36m\]\w ->\[\e[0m\\] ' + +# Update Nextflow +nextflow self-update + +# Update welcome message +echo "Welcome to the nf-core/createpanelrefs devcontainer!" > /usr/local/etc/vscode-dev-containers/first-run-notice.txt diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml index bf44d96..3b9724c 100644 --- a/.github/actions/nf-test/action.yml +++ b/.github/actions/nf-test/action.yml @@ -25,9 +25,9 @@ runs: version: "${{ env.NXF_VERSION }}" - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" - name: Install nf-test uses: nf-core/setup-nf-test@v1 @@ -52,6 +52,8 @@ runs: with: auto-update-conda: true conda-solver: libmamba + channels: conda-forge + channel-priority: strict conda-remove-defaults: true - name: Run nf-test diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 16879f1..edd7906 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -28,15 +28,15 @@ jobs: # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + workspace_id: ${{ vars.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + compute_env: ${{ vars.TOWER_COMPUTE_ENV }} revision: ${{ steps.revision.outputs.revision }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/createpanelrefs/work-${{ steps.revision.outputs.revision }} + workdir: s3://${{ vars.AWS_S3_BUCKET }}/work/createpanelrefs/work-${{ steps.revision.outputs.revision }} parameters: | { "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/createpanelrefs/results-${{ steps.revision.outputs.revision }}" + "outdir": "s3://${{ vars.AWS_S3_BUCKET }}/createpanelrefs/results-${{ steps.revision.outputs.revision }}" } profiles: test_full @@ -44,5 +44,5 @@ jobs: with: name: Seqera Platform debug log file path: | - seqera_platform_action_*.log - seqera_platform_action_*.json + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 7c2c028..e2e0879 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -14,14 +14,14 @@ jobs: - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + workspace_id: ${{ vars.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + compute_env: ${{ vars.TOWER_COMPUTE_ENV }} revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/createpanelrefs/work-${{ github.sha }} + workdir: s3://${{ vars.AWS_S3_BUCKET }}/work/createpanelrefs/work-${{ github.sha }} parameters: | { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/createpanelrefs/results-test-${{ github.sha }}" + "outdir": "s3://${{ vars.AWS_S3_BUCKET }}/createpanelrefs/results-test-${{ github.sha }}" } profiles: test @@ -29,5 +29,5 @@ jobs: with: name: Seqera Platform debug log file path: | - seqera_platform_action_*.log - seqera_platform_action_*.json + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index ac030fd..6adb0ff 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9 + - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 999bcc3..6d94bcb 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -44,9 +44,9 @@ jobs: - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" architecture: "x64" - name: Setup Apptainer @@ -57,7 +57,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install git+https://github.com/nf-core/tools.git@dev + pip install git+https://github.com/nf-core/tools.git - name: Make a cache directory for the container images run: | diff --git a/.github/workflows/fix_linting.yml b/.github/workflows/fix_linting.yml index da9b1a4..321d648 100644 --- a/.github/workflows/fix_linting.yml +++ b/.github/workflows/fix_linting.yml @@ -13,13 +13,13 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: token: ${{ secrets.nf_core_bot_auth_token }} # indication that the linting is being fixed - name: React on comment - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: eyes @@ -32,9 +32,9 @@ jobs: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} # Install and run pre-commit - - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" - name: Install pre-commit run: pip install pre-commit @@ -47,7 +47,7 @@ jobs: # indication that the linting has finished - name: react if linting finished succesfully if: steps.pre-commit.outcome == 'success' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: "+1" @@ -67,21 +67,21 @@ jobs: - name: react if linting errors were fixed id: react-if-fixed if: steps.commit-and-push.outcome == 'success' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: hooray - name: react if linting errors were not fixed if: steps.commit-and-push.outcome == 'failure' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: comment-id: ${{ github.event.comment.id }} reactions: confused - name: react if linting errors were not fixed if: steps.commit-and-push.outcome == 'failure' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + uses: peter-evans/create-or-update-comment@e8674b075228eee787fea43ef493e45ece1004c9 # v5 with: issue-number: ${{ github.event.issue.number }} body: | diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8b0f88c..30e6602 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,12 +11,12 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 - - name: Set up Python 3.13 - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - name: Set up Python 3.14 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" - name: Install pre-commit run: pip install pre-commit @@ -28,14 +28,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" architecture: "x64" - name: read .nf-core.yml diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index d43797d..e6e9bc2 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2 + uses: marocchino/sticky-pull-request-comment@773744901bac0e8cbb5a0dc842800d45e9b2b405 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index e7b5844..e20bf6d 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -18,7 +18,7 @@ concurrency: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - NFT_VER: "0.9.2" + NFT_VER: "0.9.3" NFT_WORKDIR: "~" NXF_ANSI_LOG: false NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity @@ -40,7 +40,7 @@ jobs: rm -rf ./* || true rm -rf ./.??* || true ls -la ./ - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: fetch-depth: 0 @@ -78,14 +78,14 @@ jobs: - isMain: false profile: "singularity" NXF_VER: - - "24.10.5" + - "25.04.0" - "latest-everything" env: NXF_ANSI_LOG: false TOTAL_SHARDS: ${{ needs.nf-test-changes.outputs.total_shards }} steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: fetch-depth: 0 @@ -95,6 +95,7 @@ jobs: continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} env: NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + NXF_VERSION: ${{ matrix.NXF_VER }} with: profile: ${{ matrix.profile }} shard: ${{ matrix.shard }} diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index 0f73249..e64cebd 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -14,6 +14,11 @@ jobs: run: | echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" | sed 's/-//g' >> $GITHUB_OUTPUT + - name: get description + id: get_topics + run: | + echo "description=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .description' >> $GITHUB_OUTPUT + - uses: rzr/fediverse-action@master with: access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} @@ -23,6 +28,8 @@ jobs: message: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + ${{ steps.get_topics.outputs.description }} + Please see the changelog: ${{ github.event.release.html_url }} ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics diff --git a/.github/workflows/template-version-comment.yml b/.github/workflows/template-version-comment.yml index beb5c77..c5988af 100644 --- a/.github/workflows/template-version-comment.yml +++ b/.github/workflows/template-version-comment.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: ref: ${{ github.event.pull_request.head.sha }} diff --git a/.gitpod.yml b/.gitpod.yml deleted file mode 100644 index 83599f6..0000000 --- a/.gitpod.yml +++ /dev/null @@ -1,10 +0,0 @@ -image: nfcore/gitpod:latest -tasks: - - name: Update Nextflow and setup pre-commit - command: | - pre-commit install --install-hooks - nextflow self-update - -vscode: - extensions: - - nf-core.nf-core-extensionpack # https://github.com/nf-core/vscode-extensionpack diff --git a/.nf-core.yml b/.nf-core.yml index c72ef97..5d27ee6 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,12 +1,10 @@ lint: files_exist: - - .github/workflows/ci.yml - conf/modules.config files_unchanged: - - .gitattributes - - .gitignore + - .github/PULL_REQUEST_TEMPLATE.md modules_config: false -nf_core_version: 3.3.2 +nf_core_version: 3.4.1 repository_type: pipeline template: author: "@maxulysse" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bb41bee..d06777a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: additional_dependencies: - prettier@3.6.2 - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: trailing-whitespace args: [--markdown-linebreak-ext=md] diff --git a/.prettierignore b/.prettierignore index edd29f0..2255e3e 100644 --- a/.prettierignore +++ b/.prettierignore @@ -10,4 +10,5 @@ testing/ testing* *.pyc bin/ +.nf-test/ ro-crate-metadata.json diff --git a/README.md b/README.md index e51251f..1aa4ea3 100644 --- a/README.md +++ b/README.md @@ -5,12 +5,13 @@ +[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new/nf-core/createpanelrefs) [![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml) [![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) -[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2) +[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.4.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.4.1) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) diff --git a/docs/usage.md b/docs/usage.md index c548b62..7456e98 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -149,7 +149,7 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - `shifter` - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) + - A generic configuration profile to be used with [Charliecloud](https://charliecloud.io/) - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `wave` diff --git a/main.nf b/main.nf index 5a261d3..6571161 100644 --- a/main.nf +++ b/main.nf @@ -74,7 +74,10 @@ workflow { params.monochrome_logs, args, params.outdir, - params.input + params.input, + params.help, + params.help_full, + params.show_hidden ) // diff --git a/modules.json b/modules.json index 3d174e2..775df4d 100644 --- a/modules.json +++ b/modules.json @@ -12,7 +12,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "git_sha": "e10b76ca0c66213581bec2833e30d31f239dec0b", "installed_by": ["modules"] } } @@ -21,17 +21,17 @@ "nf-core": { "utils_nextflow_pipeline": { "branch": "master", - "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "51ae5406a030d4da1e49e4dab49756844fdd6c7a", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", - "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", + "git_sha": "4b406a74dc0449c0401ed87d5bfff4252fd277fd", "installed_by": ["subworkflows"] } } diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index 812fc4c..dd513cb 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::multiqc=1.29 + - bioconda::multiqc=1.31 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 0ac3c36..5288f5c 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.29--pyhdfd78af_0' : - 'biocontainers/multiqc:1.29--pyhdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ef/eff0eafe78d5f3b65a6639265a16b89fdca88d06d18894f90fcdb50142004329/data' : + 'community.wave.seqera.io/library/multiqc:1.31--1efbafd542a23882' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index 88e9057..17881d1 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,c1fe644a37468f6dae548d98bc72c2c1" + "versions.yml:md5,8968b114a3e20756d8af2b80713bcc4f" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.04.2" + "nextflow": "25.04.6" }, - "timestamp": "2025-05-22T11:50:41.182332996" + "timestamp": "2025-09-08T20:57:36.139055243" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,c1fe644a37468f6dae548d98bc72c2c1" + "versions.yml:md5,8968b114a3e20756d8af2b80713bcc4f" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.04.2" + "nextflow": "25.04.6" }, - "timestamp": "2025-05-22T11:51:22.448739369" + "timestamp": "2025-09-08T20:59:15.142230631" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,c1fe644a37468f6dae548d98bc72c2c1" + "versions.yml:md5,8968b114a3e20756d8af2b80713bcc4f" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.04.2" + "nextflow": "25.04.6" }, - "timestamp": "2025-05-22T11:51:06.198928424" + "timestamp": "2025-09-08T20:58:29.629087066" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml deleted file mode 100644 index bea6c0d..0000000 --- a/modules/nf-core/multiqc/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -multiqc: - - modules/nf-core/multiqc/** diff --git a/nextflow.config b/nextflow.config index cf6ffa7..7f20077 100644 --- a/nextflow.config +++ b/nextflow.config @@ -32,13 +32,15 @@ params { email_on_fail = null plaintext_email = false monochrome_logs = false - hook_url = null + hook_url = System.getenv('HOOK_URL') help = false help_full = false show_hidden = false version = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' - trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options + trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + + // Config options config_profile_name = null config_profile_description = null @@ -91,7 +93,18 @@ profiles { apptainer.enabled = false docker.runOptions = '-u $(id -u):$(id -g)' } - arm { + arm64 { + process.arch = 'arm64' + // TODO https://github.com/nf-core/modules/issues/6694 + // For now if you're using arm64 you have to use wave for the sake of the maintainers + // wave profile + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } + emulate_amd64 { docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { @@ -148,18 +161,6 @@ profiles { wave.freeze = true wave.strategy = 'conda,container' } - gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB - process { - resourceLimits = [ - memory: 8.GB, - cpus : 4, - time : 1.h - ] - } - } gpu { docker.runOptions = '-u $(id -u):$(id -g) --gpus all' apptainer.runOptions = '--nv' @@ -169,6 +170,8 @@ profiles { test_full { includeConfig 'conf/test_full.config' } } +// Set AWS client to anonymous when using the default igenomes_base +aws.client.anonymous = !params.igenomes_ignore && params.igenomes_base?.startsWith('s3://ngi-igenomes/igenomes/') ?: false // Load nf-core custom profiles from different institutions // If params.custom_config_base is set AND either the NXF_OFFLINE environment variable is not set or params.custom_config_base is a local path, the nfcore_custom.config file from the specified base path is included. @@ -250,46 +253,19 @@ manifest { description = """Generate Panel of Normals, models or other similar references from lots of samples""" mainScript = 'main.nf' defaultBranch = 'main' - nextflowVersion = '!>=24.10.5' + nextflowVersion = '!>=25.04.0' version = '1.0.0' doi = '' } // Nextflow plugins plugins { - id 'nf-schema@2.4.2' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.5.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet } validation { defaultIgnoreParams = ["genomes"] monochromeLogs = params.monochrome_logs - help { - enabled = true - command = "nextflow run nf-core/createpanelrefs -profile --input samplesheet.csv --outdir " - fullParameter = "help_full" - showHiddenParameter = "show_hidden" - beforeText = """ --\033[2m----------------------------------------------------\033[0m- - \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m -\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m -\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m -\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m - \033[0;32m`._,._,\'\033[0m -\033[0;35m nf-core/createpanelrefs ${manifest.version}\033[0m --\033[2m----------------------------------------------------\033[0m- -""" - afterText = """${manifest.doi ? "\n* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} -* The nf-core framework - https://doi.org/10.1038/s41587-020-0439-x - -* Software dependencies - https://github.com/nf-core/createpanelrefs/blob/main/CITATIONS.md -""" - } - summary { - beforeText = validation.help.beforeText - afterText = validation.help.afterText - } } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index 9fbd822..58c70ee 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -224,6 +224,18 @@ "fa_icon": "far calendar", "description": "Suffix to add to the trace report filename. Default is the date and time in the format yyyy-MM-dd_HH-mm-ss.", "hidden": true + }, + "help": { + "type": ["boolean", "string"], + "description": "Display the help message." + }, + "help_full": { + "type": "boolean", + "description": "Display the full detailed help message." + }, + "show_hidden": { + "type": "boolean", + "description": "Display hidden parameters in the help message (only works when --help or --help_full are provided)." } } } diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 38ac151..a12f519 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,8 +22,8 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "Stable", - "datePublished": "2025-07-08T11:38:16+00:00", - "description": "

\n \n \n \"nf-core/createpanelrefs\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics pipeline that ...\n\n\n\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\n\nNow, you can run the pipeline using:\n\n\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "datePublished": "2025-10-16T13:37:52+00:00", + "description": "

\n \n \n \"nf-core/createpanelrefs\"\n \n

\n\n[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new/nf-core/createpanelrefs)\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.4.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.4.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics pipeline that ...\n\n\n\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\n\nNow, you can run the pipeline using:\n\n\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -99,7 +99,7 @@ }, "mentions": [ { - "@id": "#c92fc7bd-512a-4c0c-8702-9b0f88a36652" + "@id": "#cd8e2e98-9caa-47f7-acad-0ff49cc4a459" } ], "name": "nf-core/createpanelrefs" @@ -123,7 +123,7 @@ "@id": "main.nf", "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], "dateCreated": "", - "dateModified": "2025-07-08T11:38:16Z", + "dateModified": "2025-10-16T13:37:52Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": ["nf-core", "nextflow"], "license": ["MIT"], @@ -147,14 +147,14 @@ "url": { "@id": "https://www.nextflow.io/" }, - "version": "!>=24.10.5" + "version": "!>=25.04.0" }, { - "@id": "#c92fc7bd-512a-4c0c-8702-9b0f88a36652", + "@id": "#cd8e2e98-9caa-47f7-acad-0ff49cc4a459", "@type": "TestSuite", "instance": [ { - "@id": "#9cfceb7e-0134-4fcf-a98b-804534d859ad" + "@id": "#83ab708a-c1b8-4ed6-bb52-d96894e3fc34" } ], "mainEntity": { @@ -163,7 +163,7 @@ "name": "Test suite for nf-core/createpanelrefs" }, { - "@id": "#9cfceb7e-0134-4fcf-a98b-804534d859ad", + "@id": "#83ab708a-c1b8-4ed6-bb52-d96894e3fc34", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/createpanelrefs", "resource": "repos/nf-core/createpanelrefs/actions/workflows/nf-test.yml", diff --git a/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf b/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf index 90719c0..1cc2c38 100644 --- a/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf @@ -11,6 +11,7 @@ include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' include { paramsSummaryMap } from 'plugin/nf-schema' include { samplesheetToList } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' include { imNotification } from '../../nf-core/utils_nfcore_pipeline' @@ -32,6 +33,9 @@ workflow PIPELINE_INITIALISATION { nextflow_cli_args // array: List of positional nextflow CLI args outdir // string: The output directory where the results will be saved input // string: Path to input samplesheet + help // boolean: Display help message and exit + help_full // boolean: Show the full help message + show_hidden // boolean: Show hidden parameters in the help message main: @@ -50,10 +54,35 @@ workflow PIPELINE_INITIALISATION { // // Validate parameters and generate parameter summary to stdout // + before_text = """ +-\033[2m----------------------------------------------------\033[0m- + \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m +\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m +\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m +\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m + \033[0;32m`._,._,\'\033[0m +\033[0;35m nf-core/createpanelrefs ${workflow.manifest.version}\033[0m +-\033[2m----------------------------------------------------\033[0m- +""" + after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${workflow.manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/nf-core/createpanelrefs/blob/main/CITATIONS.md +""" + command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFSCHEMA_PLUGIN ( workflow, validate_params, - null + null, + help, + help_full, + show_hidden, + before_text, + after_text, + command ) // diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml deleted file mode 100644 index f847611..0000000 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/utils_nextflow_pipeline: - - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml deleted file mode 100644 index ac8523c..0000000 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/utils_nfcore_pipeline: - - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf index 4994303..ee4738c 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/main.nf +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -4,6 +4,7 @@ include { paramsSummaryLog } from 'plugin/nf-schema' include { validateParameters } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' workflow UTILS_NFSCHEMA_PLUGIN { @@ -15,29 +16,56 @@ workflow UTILS_NFSCHEMA_PLUGIN { // when this input is empty it will automatically use the configured schema or // "${projectDir}/nextflow_schema.json" as default. This input should not be empty // for meta pipelines + help // boolean: show help message + help_full // boolean: show full help message + show_hidden // boolean: show hidden parameters in help message + before_text // string: text to show before the help message and parameters summary + after_text // string: text to show after the help message and parameters summary + command // string: an example command of the pipeline main: + if(help || help_full) { + help_options = [ + beforeText: before_text, + afterText: after_text, + command: command, + showHidden: show_hidden, + fullHelp: help_full, + ] + if(parameters_schema) { + help_options << [parametersSchema: parameters_schema] + } + log.info paramsHelp( + help_options, + params.help instanceof String ? params.help : "", + ) + exit 0 + } + // // Print parameter summary to stdout. This will display the parameters // that differ from the default given in the JSON schema // + + summary_options = [:] if(parameters_schema) { - log.info paramsSummaryLog(input_workflow, parameters_schema:parameters_schema) - } else { - log.info paramsSummaryLog(input_workflow) + summary_options << [parametersSchema: parameters_schema] } + log.info before_text + log.info paramsSummaryLog(summary_options, input_workflow) + log.info after_text // // Validate the parameters using nextflow_schema.json or the schema // given via the validation.parametersSchema configuration option // if(validate_params) { + validateOptions = [:] if(parameters_schema) { - validateParameters(parameters_schema:parameters_schema) - } else { - validateParameters() + validateOptions << [parametersSchema: parameters_schema] } + validateParameters(validateOptions) } emit: diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test index 8fb3016..c977917 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test @@ -25,6 +25,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -51,6 +57,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -77,6 +89,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -103,6 +121,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -114,4 +138,36 @@ nextflow_workflow { ) } } + + test("Should create a help message") { + + when { + + params { + test_data = '' + outdir = null + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + input[3] = true + input[4] = false + input[5] = false + input[6] = "Before" + input[7] = "After" + input[8] = "nextflow run test/test" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } } diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config index 09ef842..8d8c737 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -1,8 +1,8 @@ plugins { - id "nf-schema@2.4.2" + id "nf-schema@2.5.1" } validation { parametersSchema = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" monochromeLogs = true -} \ No newline at end of file +} diff --git a/tests/.nftignore b/tests/.nftignore index 158c83c..e128a12 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -1,10 +1,11 @@ .DS_Store multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt -multiqc/multiqc_data/BETA-multiqc.parquet +multiqc/multiqc_data/multiqc.parquet multiqc/multiqc_data/multiqc.log multiqc/multiqc_data/multiqc_data.json multiqc/multiqc_data/multiqc_sources.txt multiqc/multiqc_data/multiqc_software_versions.txt +multiqc/multiqc_data/llms-full.txt multiqc/multiqc_plots/{svg,pdf,png}/*.{svg,pdf,png} multiqc/multiqc_report.html fastqc/*_fastqc.{html,zip} diff --git a/tests/default.nf.test b/tests/default.nf.test index b8bd0fd..6cee6eb 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -20,8 +20,6 @@ nextflow_pipeline { assertAll( { assert workflow.success}, { assert snapshot( - // Number of successful tasks - workflow.trace.succeeded().size(), // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions removeNextflowVersion("$outputDir/pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml"), // All stable path name, with a relative path From 2068f866103ae7feb5822a9829cb7f79d0162ad9 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 31 Oct 2025 12:48:05 +0100 Subject: [PATCH 219/234] TEMPLATE --- .github/PULL_REQUEST_TEMPLATE.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 4d074d2..af974b5 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -8,14 +8,14 @@ These are the most common things requested on pull requests (PRs). Remember that PRs should be made against the dev branch, unless you're preparing a pipeline release. -Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/createpanelrefs/tree/main/.github/CONTRIBUTING.md) +Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/createpanelrefs/tree/master/.github/CONTRIBUTING.md) --> ## PR checklist - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/createpanelrefs/tree/main/.github/CONTRIBUTING.md) +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/createpanelrefs/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/createpanelrefs _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core pipelines lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). From 4dd25c1412b09b95adbcd9ea6aa90968f9436605 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 31 Oct 2025 12:49:43 +0100 Subject: [PATCH 220/234] pre-commit --- modules/nf-core/gatk4/mutect2/meta.yml | 1 - subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml | 1 - tests/.nftignore | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/nf-core/gatk4/mutect2/meta.yml b/modules/nf-core/gatk4/mutect2/meta.yml index e3b0835..64029f2 100644 --- a/modules/nf-core/gatk4/mutect2/meta.yml +++ b/modules/nf-core/gatk4/mutect2/meta.yml @@ -159,4 +159,3 @@ authors: maintainers: - "@GCJMackenzie" - "@ramprasadn" - diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml b/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml index ed1ba59..2660836 100644 --- a/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml @@ -67,4 +67,3 @@ authors: - "@GCJMackenzie" maintainers: - "@GCJMackenzie" - \ No newline at end of file diff --git a/tests/.nftignore b/tests/.nftignore index 30eafed..a3d1717 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -42,4 +42,4 @@ multiqc/multiqc_plots/{svg,pdf,png}/*.{svg,pdf,png} multiqc/multiqc_report.html pipeline_info/*.{html,json,txt,yml} references/genome.dict -references/intervals/gens_pon/genome.interval_list \ No newline at end of file +references/intervals/gens_pon/genome.interval_list From b70f13bbd3058665dee8f3713c2a405c933c3b52 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 31 Oct 2025 13:43:15 +0100 Subject: [PATCH 221/234] fix linting --- modules.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules.json b/modules.json index 51742a4..e26a28b 100644 --- a/modules.json +++ b/modules.json @@ -72,7 +72,7 @@ }, "gatk4/mutect2": { "branch": "master", - "git_sha": "3e3ffe7f2a3bd3d9140619c74bfe2316194311e1", + "git_sha": "59a5cb7332c37182afa246a2ffdbdeb7cc4e6b75", "installed_by": ["bam_create_som_pon_gatk", "modules"] }, "gatk4/preprocessintervals": { @@ -111,7 +111,7 @@ "nf-core": { "bam_create_som_pon_gatk": { "branch": "master", - "git_sha": "3e3ffe7f2a3bd3d9140619c74bfe2316194311e1", + "git_sha": "59a5cb7332c37182afa246a2ffdbdeb7cc4e6b75", "installed_by": ["subworkflows"] }, "utils_nextflow_pipeline": { From 211e2940ab64769f509e2dc475ea7666817f1751 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 31 Oct 2025 13:52:24 +0100 Subject: [PATCH 222/234] fix toots release --- .github/workflows/release-announcements.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index e64cebd..039f5e8 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -15,9 +15,9 @@ jobs: echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" | sed 's/-//g' >> $GITHUB_OUTPUT - name: get description - id: get_topics + id: get_description run: | - echo "description=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .description' >> $GITHUB_OUTPUT + echo "description=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .description')" >> $GITHUB_OUTPUT - uses: rzr/fediverse-action@master with: @@ -28,7 +28,7 @@ jobs: message: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - ${{ steps.get_topics.outputs.description }} + ${{ steps.get_description.outputs.description }} Please see the changelog: ${{ github.event.release.html_url }} From 8d0d07e94cffc41ec4de2af54bf347b30760e442 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 31 Oct 2025 14:18:59 +0100 Subject: [PATCH 223/234] LS --- CHANGELOG.md | 2 +- main.nf | 1 - workflows/createpanelrefs.nf | 36 ++++++++++++++++++------------------ 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 174f12c..c7bb898 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,7 +53,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n | cnvkit | | 0.9.10 | | gatk4 | | 4.6.1.0 | | gawk | | 5.3.0 | -| multiqc | | 1.30 | +| multiqc | | 1.32 | | samtools | | 1.21 | ### `Deprecated` diff --git a/main.nf b/main.nf index c14cc9c..0ba1739 100644 --- a/main.nf +++ b/main.nf @@ -125,7 +125,6 @@ workflow { gens_interval_list = PREPARE_GENOME.out.gens_interval_list mutect2_target_bed = PREPARE_GENOME.out.mutect2_target_bed - multiqc_config = channel.fromPath("${projectDir}/assets/multiqc_config.yml", checkIfExists: true) multiqc_custom_config = params.multiqc_config ? channel.fromPath(params.multiqc_config, checkIfExists: true) : channel.empty() multiqc_logo = params.multiqc_logo ? channel.fromPath(params.multiqc_logo, checkIfExists: true) : channel.empty() diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 50e3338..a534e83 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -12,27 +12,27 @@ include { SAMTOOLS_VIEW } from '../modules/nf-core/samtools/view' workflow CREATEPANELREFS { take: - samplesheet // channel: samplesheet read in from --input - tools // array: tools to run, or no_tools if none (it's actually comma separated values string, but close enough) - gcnv_model_name // string: name of gcnv model - gens_pon_name // string: name of gens pon - mutect2_pon_name // string: name of mutect2 pon - fasta // channel: [meta, fasta] - dict // channel: [meta, dict] - fai // channel: [meta, fai] - cnvkit_targets // channel: [meta, cnvkit_targets] - gcnv_exclude_bed // channel: [meta, gcnv_exclude_bed] - gcnv_exclude_interval_list // channel: [meta, gcnv_exclude_interval_list] - gcnv_mappable_regions // channel: [meta, gcnv_mappable_regions] - gcnv_ploidy_priors // channel: [meta, gcnv_ploidy_priors] + samplesheet // channel: samplesheet read in from --input + tools // array: tools to run, or no_tools if none (it's actually comma separated values string, but close enough) + gcnv_model_name // string: name of gcnv model + gens_pon_name // string: name of gens pon + mutect2_pon_name // string: name of mutect2 pon + fasta // channel: [meta, fasta] + dict // channel: [meta, dict] + fai // channel: [meta, fai] + cnvkit_targets // channel: [meta, cnvkit_targets] + gcnv_exclude_bed // channel: [meta, gcnv_exclude_bed] + gcnv_exclude_interval_list // channel: [meta, gcnv_exclude_interval_list] + gcnv_mappable_regions // channel: [meta, gcnv_mappable_regions] + gcnv_ploidy_priors // channel: [meta, gcnv_ploidy_priors] gcnv_segmental_duplications // channel: [meta, gcnv_segmental_duplications] - gcnv_target_bed // channel: [meta, gcnv_target_bed] - gcnv_target_interval_list // channel: [meta, gcnv_target_interval_list] - gens_interval_list // channel: [meta, gens_interval_list] - mutect2_target_bed // channel: [meta, mutect2_target_bed] + gcnv_target_bed // channel: [meta, gcnv_target_bed] + gcnv_target_interval_list // channel: [meta, gcnv_target_interval_list] + gens_interval_list // channel: [meta, gens_interval_list] + mutect2_target_bed // channel: [meta, mutect2_target_bed] main: - versions = Channel.empty() + versions = channel.empty() if (tools.split(',').contains('cnvkit')) { From 0b9b322576a9000cb0b0e1e5ce8dfbc90df687ab Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 31 Oct 2025 14:20:06 +0100 Subject: [PATCH 224/234] update all modules --- modules.json | 32 +++--- modules/nf-core/cnvkit/batch/environment.yml | 6 +- modules/nf-core/cnvkit/batch/main.nf | 108 +++++++++++------- modules/nf-core/cnvkit/batch/meta.yml | 57 +++++---- .../gatk4/annotateintervals/environment.yml | 3 +- .../nf-core/gatk4/annotateintervals/main.nf | 19 +-- .../nf-core/gatk4/annotateintervals/meta.yml | 31 +++-- .../gatk4/bedtointervallist/environment.yml | 3 +- .../nf-core/gatk4/bedtointervallist/main.nf | 23 ++-- .../nf-core/gatk4/bedtointervallist/meta.yml | 24 ++-- .../gatk4/collectreadcounts/environment.yml | 3 +- .../nf-core/gatk4/collectreadcounts/main.nf | 49 ++++---- .../nf-core/gatk4/collectreadcounts/meta.yml | 29 +++-- .../environment.yml | 3 +- .../createreadcountpanelofnormals/main.nf | 23 ++-- .../createreadcountpanelofnormals/meta.yml | 19 +-- .../createsequencedictionary/environment.yml | 3 +- .../gatk4/createsequencedictionary/main.nf | 25 ++-- .../gatk4/createsequencedictionary/meta.yml | 19 +-- .../environment.yml | 3 +- .../determinegermlinecontigploidy/main.nf | 32 +++--- .../determinegermlinecontigploidy/meta.yml | 34 +++--- .../gatk4/filterintervals/environment.yml | 3 +- modules/nf-core/gatk4/filterintervals/main.nf | 31 ++--- .../nf-core/gatk4/filterintervals/meta.yml | 26 +++-- .../gatk4/germlinecnvcaller/environment.yml | 3 +- .../nf-core/gatk4/germlinecnvcaller/main.nf | 45 ++++---- .../nf-core/gatk4/germlinecnvcaller/meta.yml | 27 +++-- .../gatk4/indexfeaturefile/environment.yml | 3 +- .../nf-core/gatk4/indexfeaturefile/main.nf | 21 ++-- .../nf-core/gatk4/indexfeaturefile/meta.yml | 18 +-- .../gatk4/intervallisttools/environment.yml | 3 +- .../nf-core/gatk4/intervallisttools/main.nf | 21 ++-- .../nf-core/gatk4/intervallisttools/meta.yml | 22 ++-- .../gatk4/preprocessintervals/environment.yml | 3 +- .../nf-core/gatk4/preprocessintervals/main.nf | 31 ++--- .../gatk4/preprocessintervals/meta.yml | 26 +++-- modules/nf-core/gawk/meta.yml | 49 ++++---- .../nf-core/samtools/faidx/environment.yml | 6 +- modules/nf-core/samtools/faidx/main.nf | 4 +- modules/nf-core/samtools/faidx/meta.yml | 52 +++++---- .../nf-core/samtools/index/environment.yml | 6 +- modules/nf-core/samtools/index/main.nf | 6 +- modules/nf-core/samtools/index/meta.yml | 28 +++-- modules/nf-core/samtools/view/environment.yml | 5 +- modules/nf-core/samtools/view/main.nf | 5 +- modules/nf-core/samtools/view/meta.yml | 72 +++++++----- 47 files changed, 613 insertions(+), 451 deletions(-) diff --git a/modules.json b/modules.json index e26a28b..1983e31 100644 --- a/modules.json +++ b/modules.json @@ -7,32 +7,32 @@ "nf-core": { "cnvkit/batch": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "09223d6de1dab602242c4c57ab2a4599d460e528", "installed_by": ["modules"] }, "gatk4/annotateintervals": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", "installed_by": ["modules"] }, "gatk4/bedtointervallist": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", "installed_by": ["modules"] }, "gatk4/collectreadcounts": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", "installed_by": ["modules"] }, "gatk4/createreadcountpanelofnormals": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", "installed_by": ["modules"] }, "gatk4/createsequencedictionary": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", "installed_by": ["modules"] }, "gatk4/createsomaticpanelofnormals": { @@ -42,12 +42,12 @@ }, "gatk4/determinegermlinecontigploidy": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", "installed_by": ["modules"] }, "gatk4/filterintervals": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", "installed_by": ["modules"] }, "gatk4/genomicsdbimport": { @@ -57,17 +57,17 @@ }, "gatk4/germlinecnvcaller": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", "installed_by": ["modules"] }, "gatk4/indexfeaturefile": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", "installed_by": ["modules"] }, "gatk4/intervallisttools": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", "installed_by": ["modules"] }, "gatk4/mutect2": { @@ -77,12 +77,12 @@ }, "gatk4/preprocessintervals": { "branch": "master", - "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", + "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", "installed_by": ["modules"] }, "gawk": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", "installed_by": ["modules"] }, "multiqc": { @@ -92,17 +92,17 @@ }, "samtools/faidx": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "c8be52dba1166c678e74cda9c3a3c221635c8bb1", "installed_by": ["modules"] }, "samtools/index": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "c8be52dba1166c678e74cda9c3a3c221635c8bb1", "installed_by": ["modules"] }, "samtools/view": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "c8be52dba1166c678e74cda9c3a3c221635c8bb1", "installed_by": ["modules"] } } diff --git a/modules/nf-core/cnvkit/batch/environment.yml b/modules/nf-core/cnvkit/batch/environment.yml index a2466da..76271e4 100644 --- a/modules/nf-core/cnvkit/batch/environment.yml +++ b/modules/nf-core/cnvkit/batch/environment.yml @@ -5,6 +5,6 @@ channels: - bioconda dependencies: - - bioconda::cnvkit=0.9.10 - - bioconda::htslib=1.17 - - bioconda::samtools=1.17 + - bioconda::cnvkit=0.9.12 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/cnvkit/batch/main.nf b/modules/nf-core/cnvkit/batch/main.nf index 9e8aafa..2a81901 100644 --- a/modules/nf-core/cnvkit/batch/main.nf +++ b/modules/nf-core/cnvkit/batch/main.nf @@ -1,11 +1,11 @@ process CNVKIT_BATCH { - tag "$meta.id" + tag "${meta.id}" label 'process_low' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:c94363856059151a2974dc501fb07a0360cc60a3-0' : - 'biocontainers/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:c94363856059151a2974dc501fb07a0360cc60a3-0' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/3e/3e8542cdb0190cfe2cedd74f714f021a2ffa94be3ec2a5b95ff52610cb3e2c34/data' + : 'community.wave.seqera.io/library/cnvkit_htslib_samtools:86928c121163aca7'}" input: tuple val(meta), path(tumor), path(normal) @@ -13,7 +13,7 @@ process CNVKIT_BATCH { tuple val(meta3), path(fasta_fai) tuple val(meta4), path(targets) tuple val(meta5), path(reference) - val panel_of_normals + val panel_of_normals output: tuple val(meta), path("*.bed"), emit: bed @@ -22,89 +22,117 @@ process CNVKIT_BATCH { tuple val(meta), path("*.cns"), emit: cns, optional: true tuple val(meta), path("*.pdf"), emit: pdf, optional: true tuple val(meta), path("*.png"), emit: png, optional: true - path "versions.yml" , emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when - script: def args = task.ext.args ?: '' - def tumor_exists = tumor ? true : false def normal_exists = normal ? true : false def reference_exists = reference ? true : false - - // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow + // execute samtools only when cram files are input, cnvkit runs natively on cram but is prohibitively slow def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false def normal_cram = normal_exists && normal.Extension == "cram" ? true : false - def tumor_bam = tumor_exists && tumor.Extension == "bam" ? true : false - def normal_bam = normal_exists && normal.Extension == "bam" ? true : false def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}" // tumor_only mode does not need fasta & target - // instead it requires a pre-computed reference.cnn which is built from fasta & target + // instead a pre-computed reference.cnn may be supplied which is built from fasta & target def (normal_out, normal_args, fasta_args) = ["", "", ""] def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : "" - if (normal_exists){ + if (normal_exists) { def normal_prefix = normal.BaseName normal_out = normal_cram ? "${normal_prefix}" + ".bam" : "${normal}" - fasta_args = fasta ? "--fasta $fasta" : "" + fasta_args = fasta ? "--fasta ${fasta}" : "" // germline mode // normal samples must be input without a flag // requires flag --normal to be empty [] - if(!tumor_exists){ + if (!tumor_exists) { tumor_out = "${normal_prefix}" + ".bam" normal_args = "--normal " } - // somatic mode else { - normal_args = normal_prefix ? "--normal $normal_out" : "" + normal_args = normal_prefix ? "--normal ${normal_out}" : "" } - if (reference_exists){ + if (reference_exists) { fasta_args = "" normal_args = "" } } - // generation of panel of normals def generate_pon = panel_of_normals ? true : false - if (generate_pon && !tumor_exists){ + if (generate_pon && !tumor_exists) { def pon_input = normal.join(' ') - normal_args = "--normal $pon_input" + normal_args = "--normal ${pon_input}" tumor_out = "" } - def target_args = targets && !reference_exists ? "--targets $targets" : "" - def reference_args = reference ? "--reference $reference" : "" + // tumor_only mode and no reference + // generate a "flat" reference which assumes equal coverage + // by passing '--normal' without any files + if (!reference_exists & !normal_exists & tumor_exists) { + normal_args = normal_args ?: "--normal" + } + + def target_args = targets && !reference_exists ? "--targets ${targets}" : "" + def reference_args = reference ? "--reference ${reference}" : "" def samtools_cram_convert = '' - samtools_cram_convert += normal_cram ? " samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $normal_out\n" : '' - samtools_cram_convert += normal_cram ? " samtools index $normal_out\n" : '' - samtools_cram_convert += tumor_cram ? " samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out\n" : '' - samtools_cram_convert += tumor_cram ? " samtools index $tumor_out\n" : '' - def versions = normal_cram || tumor_cram ? - "samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')\n cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g')" : - "cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g')" + samtools_cram_convert += normal_cram ? " samtools view -T ${fasta} ${fai_reference} ${normal} -@ ${task.cpus} -o ${normal_out}\n" : '' + samtools_cram_convert += normal_cram ? " samtools index ${normal_out}\n" : '' + samtools_cram_convert += tumor_cram ? " samtools view -T ${fasta} ${fai_reference} ${tumor} -@ ${task.cpus} -o ${tumor_out}\n" : '' + samtools_cram_convert += tumor_cram ? " samtools index ${tumor_out}\n" : '' + def versions = normal_cram || tumor_cram + ? "samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')\n cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g')" + : "cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g')" """ - $samtools_cram_convert - + ${samtools_cram_convert} cnvkit.py \\ batch \\ - $tumor_out \\ - $normal_args \\ - $fasta_args \\ - $reference_args \\ - $target_args \\ - --processes $task.cpus \\ - $args + ${tumor_out} \\ + ${normal_args} \\ + ${fasta_args} \\ + ${reference_args} \\ + ${target_args} \\ + --processes ${task.cpus} \\ + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": ${versions} END_VERSIONS """ + stub: + def tumor_exists = tumor ? true : false + def normal_exists = normal ? true : false + def reference_exists = reference ? true : false + // identify BED naming pattern + def bed_prefix = reference_exists ? reference.BaseName : targets ? targets.BaseName : "" + def bed_suffix = reference_exists ? "-tmp.bed" : ".bed" + // execute samtools only when cram files are input, cnvkit runs natively on cram but is prohibitively slow + def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false + def normal_cram = normal_exists && normal.Extension == "cram" ? true : false + def out_base_name = tumor_exists ? tumor.BaseName : normal.BaseName + def versions = normal_cram || tumor_cram + ? "samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')\n cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g')" + : "cnvkit: \$(cnvkit.py version | sed -e 's/cnvkit v//g')" + """ + touch ${bed_prefix}.antitarget${bed_suffix} + touch ${bed_prefix}.target${bed_suffix} + touch "reference.cnn" + touch ${out_base_name}.antitargetcoverage.cnn + touch ${out_base_name}.bintest.cns + touch ${out_base_name}.call.cns + touch ${out_base_name}.cnr + touch ${out_base_name}.cns + touch ${out_base_name}.targetcoverage.cnn + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ${versions} + END_VERSIONS + """ } diff --git a/modules/nf-core/cnvkit/batch/meta.yml b/modules/nf-core/cnvkit/batch/meta.yml index 30f7a1a..d4ee78e 100644 --- a/modules/nf-core/cnvkit/batch/meta.yml +++ b/modules/nf-core/cnvkit/batch/meta.yml @@ -23,10 +23,12 @@ input: type: file description: | Input tumour sample bam file (or cram) + ontologies: [] - normal: type: file description: | Input normal sample bam file (or cram) + ontologies: [] - - meta2: type: map description: | @@ -36,6 +38,7 @@ input: type: file description: | Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) + ontologies: [] - - meta3: type: map description: | @@ -45,6 +48,7 @@ input: type: file description: | Input reference genome fasta index (optional, but recommended for cram_input) + ontologies: [] - - meta4: type: map description: | @@ -54,6 +58,7 @@ input: type: file description: | Input target bed file + ontologies: [] - - meta5: type: map description: | @@ -63,13 +68,15 @@ input: type: file description: | Input reference cnn-file (only for germline and tumor-only running) - - - panel_of_normals: - type: file - description: | - Input panel of normals file + ontologies: [] + - panel_of_normals: + type: file + description: | + Input panel of normals file + ontologies: [] output: - - bed: - - meta: + bed: + - - meta: type: map description: | Groovy Map containing sample information @@ -78,8 +85,9 @@ output: type: file description: File containing genomic regions pattern: "*.{bed}" - - cnn: - - meta: + ontologies: [] + cnn: + - - meta: type: map description: | Groovy Map containing sample information @@ -88,8 +96,9 @@ output: type: file description: File containing coverage information pattern: "*.{cnn}" - - cnr: - - meta: + ontologies: [] + cnr: + - - meta: type: map description: | Groovy Map containing sample information @@ -98,8 +107,9 @@ output: type: file description: File containing copy number ratio information pattern: "*.{cnr}" - - cns: - - meta: + ontologies: [] + cns: + - - meta: type: map description: | Groovy Map containing sample information @@ -108,8 +118,9 @@ output: type: file description: File containing copy number segment information pattern: "*.{cns}" - - pdf: - - meta: + ontologies: [] + pdf: + - - meta: type: map description: | Groovy Map containing sample information @@ -118,8 +129,9 @@ output: type: file description: File with plot of copy numbers or segments on chromosomes pattern: "*.{pdf}" - - png: - - meta: + ontologies: [] + png: + - - meta: type: map description: | Groovy Map containing sample information @@ -128,11 +140,14 @@ output: type: file description: File with plot of bin-level log2 coverages and segmentation calls pattern: "*.{png}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@adamrtalbot" - "@drpatelh" diff --git a/modules/nf-core/gatk4/annotateintervals/environment.yml b/modules/nf-core/gatk4/annotateintervals/environment.yml index b562b72..67e0eb8 100644 --- a/modules/nf-core/gatk4/annotateintervals/environment.yml +++ b/modules/nf-core/gatk4/annotateintervals/environment.yml @@ -5,5 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/gatk4 - - bioconda::gatk4=4.6.1.0 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/annotateintervals/main.nf b/modules/nf-core/gatk4/annotateintervals/main.nf index 1f02637..50556d5 100644 --- a/modules/nf-core/gatk4/annotateintervals/main.nf +++ b/modules/nf-core/gatk4/annotateintervals/main.nf @@ -1,11 +1,11 @@ process GATK4_ANNOTATEINTERVALS { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': - 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" input: tuple val(meta), path(intervals) @@ -19,7 +19,7 @@ process GATK4_ANNOTATEINTERVALS { output: tuple val(meta), path("*.tsv"), emit: annotated_intervals - path "versions.yml" , emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -28,15 +28,16 @@ process GATK4_ANNOTATEINTERVALS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def inputs = intervals.collect(){ "--intervals ${it}" }.join(" ") + def inputs = intervals.collect { "--intervals ${it}" }.join(" ") def mappability_track = mappable_regions ? "--mappability-track ${mappable_regions}" : "" def segmental_duplication_tracks = segmental_duplication_regions ? "--segmental-duplication-track ${segmental_duplication_regions}" : "" def avail_mem = 3072 if (!task.memory) { - log.info '[GATK AnnotateIntervals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() + log.info('[GATK AnnotateIntervals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() } """ diff --git a/modules/nf-core/gatk4/annotateintervals/meta.yml b/modules/nf-core/gatk4/annotateintervals/meta.yml index ee644c9..7c6de2f 100644 --- a/modules/nf-core/gatk4/annotateintervals/meta.yml +++ b/modules/nf-core/gatk4/annotateintervals/meta.yml @@ -28,6 +28,7 @@ input: type: file description: One or more interval files to annotate pattern: "*.{interval_list,list,bed}" + ontologies: [] - - meta2: type: map description: | @@ -37,6 +38,7 @@ input: type: file description: The reference FASTA file pattern: "*.{fasta,fa}" + ontologies: [] - - meta3: type: map description: | @@ -46,6 +48,7 @@ input: type: file description: The index of the reference FASTA file pattern: "*.fai" + ontologies: [] - - meta4: type: map description: | @@ -55,6 +58,7 @@ input: type: file description: The sequence dictionary reference FASTA file pattern: "*.dict" + ontologies: [] - - meta5: type: map description: | @@ -66,6 +70,7 @@ input: Optional - Umap single-read mappability track The track should correspond to the appropriate read length and overlapping intervals must be merged pattern: "*.bed(.gz)?" + ontologies: [] - - meta6: type: map description: | @@ -76,6 +81,7 @@ input: description: Optional - The index of the gzipped umap single-read mappability track pattern: "*.bed.gz.tbi" + ontologies: [] - - meta7: type: map description: | @@ -85,6 +91,7 @@ input: type: file description: Optional - Segmental-duplication track pattern: "*.bed(.gz)?" + ontologies: [] - - meta8: type: map description: | @@ -94,9 +101,10 @@ input: type: file description: Optional - The index of the gzipped segmental-duplication track pattern: "*.bed.gz.tbi" + ontologies: [] output: - - annotated_intervals: - - meta: + annotated_intervals: + - - meta: type: map description: | Groovy Map containing sample information @@ -106,16 +114,15 @@ output: description: The output TSV file with a SAM-style header containing the annotated intervals pattern: "*.tsv" - - s: - type: file - description: The output TSV file with a SAM-style header containing the annotated - intervals - pattern: "*.tsv" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@nvnieuwk" maintainers: diff --git a/modules/nf-core/gatk4/bedtointervallist/environment.yml b/modules/nf-core/gatk4/bedtointervallist/environment.yml index b562b72..67e0eb8 100644 --- a/modules/nf-core/gatk4/bedtointervallist/environment.yml +++ b/modules/nf-core/gatk4/bedtointervallist/environment.yml @@ -5,5 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/gatk4 - - bioconda::gatk4=4.6.1.0 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/bedtointervallist/main.nf b/modules/nf-core/gatk4/bedtointervallist/main.nf index 89960e0..339e11c 100644 --- a/modules/nf-core/gatk4/bedtointervallist/main.nf +++ b/modules/nf-core/gatk4/bedtointervallist/main.nf @@ -1,11 +1,11 @@ process GATK4_BEDTOINTERVALLIST { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': - 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" input: tuple val(meta), path(bed) @@ -13,7 +13,7 @@ process GATK4_BEDTOINTERVALLIST { output: tuple val(meta), path('*.interval_list'), emit: interval_list - path "versions.yml" , emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -24,18 +24,19 @@ process GATK4_BEDTOINTERVALLIST { def avail_mem = 3072 if (!task.memory) { - log.info '[GATK BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() + log.info('[GATK BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() } """ gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ BedToIntervalList \\ - --INPUT $bed \\ + --INPUT ${bed} \\ --OUTPUT ${prefix}.interval_list \\ - --SEQUENCE_DICTIONARY $dict \\ + --SEQUENCE_DICTIONARY ${dict} \\ --TMP_DIR . \\ - $args + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gatk4/bedtointervallist/meta.yml b/modules/nf-core/gatk4/bedtointervallist/meta.yml index 25348e1..b186132 100644 --- a/modules/nf-core/gatk4/bedtointervallist/meta.yml +++ b/modules/nf-core/gatk4/bedtointervallist/meta.yml @@ -26,6 +26,7 @@ input: type: file description: Input bed file pattern: "*.bed" + ontologies: [] - - meta2: type: map description: | @@ -35,25 +36,26 @@ input: type: file description: Sequence dictionary pattern: "*.dict" + ontologies: [] output: - - interval_list: - - meta: + interval_list: + - - meta: type: file description: gatk interval list file pattern: "*.interval_list" + ontologies: [] - "*.interval_list": type: file description: gatk interval list file pattern: "*.interval_list" - - _list: - type: file - description: gatk interval list file - pattern: "*.interval_list" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@kevinmenden" - "@ramprasadn" diff --git a/modules/nf-core/gatk4/collectreadcounts/environment.yml b/modules/nf-core/gatk4/collectreadcounts/environment.yml index b562b72..67e0eb8 100644 --- a/modules/nf-core/gatk4/collectreadcounts/environment.yml +++ b/modules/nf-core/gatk4/collectreadcounts/environment.yml @@ -5,5 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/gatk4 - - bioconda::gatk4=4.6.1.0 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/collectreadcounts/main.nf b/modules/nf-core/gatk4/collectreadcounts/main.nf index c742a16..90083d8 100644 --- a/modules/nf-core/gatk4/collectreadcounts/main.nf +++ b/modules/nf-core/gatk4/collectreadcounts/main.nf @@ -1,11 +1,11 @@ process GATK4_COLLECTREADCOUNTS { - tag "$meta.id" + tag "${meta.id}" label 'process_low' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': - 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" input: tuple val(meta), path(input), path(input_index), path(intervals) @@ -14,9 +14,9 @@ process GATK4_COLLECTREADCOUNTS { tuple val(meta4), path(dict) output: - tuple val(meta), path("*.hdf5"), optional: true, emit: hdf5 - tuple val(meta), path("*.tsv") , optional: true, emit: tsv - path "versions.yml" , emit: versions + tuple val(meta), path("*.hdf5"), emit: hdf5, optional: true + tuple val(meta), path("*.tsv"), emit: tsv, optional: true + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -25,26 +25,29 @@ process GATK4_COLLECTREADCOUNTS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "--reference $fasta" : "" - def extension = args.contains("--format HDF5") ? "hdf5" : - args.contains("--format TSV") ? "tsv" : - "hdf5" + def reference = fasta ? "--reference ${fasta}" : "" + def extension = args.contains("--format HDF5") + ? "hdf5" + : args.contains("--format TSV") + ? "tsv" + : "hdf5" def avail_mem = 3072 if (!task.memory) { - log.info '[GATK COLLECTREADCOUNTS] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() + log.info('[GATK COLLECTREADCOUNTS] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() } """ gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ CollectReadCounts \\ - --input $input \\ - --intervals $intervals \\ - --output ${prefix}.$extension \\ - $reference \\ + --input ${input} \\ + --intervals ${intervals} \\ + --output ${prefix}.${extension} \\ + ${reference} \\ --tmp-dir . \\ - $args + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -55,9 +58,11 @@ process GATK4_COLLECTREADCOUNTS { stub: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def extension = args.contains("--format HDF5") ? "hdf5" : - args.contains("--format TSV") ? "tsv" : - "hdf5" + def extension = args.contains("--format HDF5") + ? "hdf5" + : args.contains("--format TSV") + ? "tsv" + : "hdf5" """ touch ${prefix}.${extension} diff --git a/modules/nf-core/gatk4/collectreadcounts/meta.yml b/modules/nf-core/gatk4/collectreadcounts/meta.yml index cf8c773..7370d01 100644 --- a/modules/nf-core/gatk4/collectreadcounts/meta.yml +++ b/modules/nf-core/gatk4/collectreadcounts/meta.yml @@ -29,14 +29,17 @@ input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" + ontologies: [] - input_index: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" + ontologies: [] - intervals: type: file description: A file containing the specified intervals pattern: "*.{bed,intervals}" + ontologies: [] - - meta2: type: map description: | @@ -46,6 +49,7 @@ input: type: file description: Optional - Reference FASTA pattern: "*.{fasta,fa}" + ontologies: [] - - meta3: type: map description: | @@ -55,6 +59,7 @@ input: type: file description: Optional - Index of the reference FASTA file pattern: "*.fai" + ontologies: [] - - meta4: type: map description: | @@ -64,9 +69,10 @@ input: type: file description: Optional - Sequence dictionary of the reference FASTA file pattern: "*.dict" + ontologies: [] output: - - hdf5: - - meta: + hdf5: + - - meta: type: map description: | Groovy Map containing sample information @@ -75,8 +81,9 @@ output: type: file description: The read counts in hdf5 format pattern: "*.hdf5" - - tsv: - - meta: + ontologies: [] + tsv: + - - meta: type: map description: | Groovy Map containing sample information @@ -85,11 +92,15 @@ output: type: file description: The read counts in TSV format pattern: "*.tsv" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@nvnieuwk" maintainers: diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml b/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml index b562b72..67e0eb8 100644 --- a/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml @@ -5,5 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/gatk4 - - bioconda::gatk4=4.6.1.0 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf b/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf index b754b05..48c927b 100644 --- a/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf @@ -1,32 +1,33 @@ process GATK4_CREATEREADCOUNTPANELOFNORMALS { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': - 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" input: tuple val(meta), path(counts) output: tuple val(meta), path("*.hdf5"), emit: pon - path "versions.yml" , emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def input_list = counts.collect(){"--input $it"}.join(" ") + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = counts.collect { "--input ${it}" }.join(" ") def avail_mem = 3072 if (!task.memory) { - log.info '[GATK CreateReadCountPanelOfNormals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() + log.info('[GATK CreateReadCountPanelOfNormals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() } """ gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml b/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml index cbbed8c..66fe086 100644 --- a/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml @@ -27,9 +27,11 @@ input: type: file description: Read counts in hdf5 or tsv format. pattern: "*.{hdf5,tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV output: - - pon: - - meta: + pon: + - - meta: type: map description: | Groovy Map containing sample information @@ -38,11 +40,14 @@ output: type: file description: Panel-of-normals file. pattern: "*.{hdf5}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@ramprasadn" maintainers: diff --git a/modules/nf-core/gatk4/createsequencedictionary/environment.yml b/modules/nf-core/gatk4/createsequencedictionary/environment.yml index b562b72..67e0eb8 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/environment.yml +++ b/modules/nf-core/gatk4/createsequencedictionary/environment.yml @@ -5,5 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/gatk4 - - bioconda::gatk4=4.6.1.0 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf index 998622a..a807400 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/main.nf +++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf @@ -1,18 +1,18 @@ process GATK4_CREATESEQUENCEDICTIONARY { - tag "$fasta" + tag "${fasta}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': - 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" input: tuple val(meta), path(fasta) output: - tuple val(meta), path('*.dict') , emit: dict - path "versions.yml" , emit: versions + tuple val(meta), path('*.dict'), emit: dict + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -22,17 +22,18 @@ process GATK4_CREATESEQUENCEDICTIONARY { def avail_mem = 6144 if (!task.memory) { - log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() + log.info('[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() } """ gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ CreateSequenceDictionary \\ - --REFERENCE $fasta \\ - --URI $fasta \\ + --REFERENCE ${fasta} \\ + --URI ${fasta} \\ --TMP_DIR . \\ - $args + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/gatk4/createsequencedictionary/meta.yml index 7b5156b..72dced2 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/meta.yml +++ b/modules/nf-core/gatk4/createsequencedictionary/meta.yml @@ -26,21 +26,26 @@ input: type: file description: Input fasta file pattern: "*.{fasta,fa}" + ontologies: [] output: - - dict: - - meta: + dict: + - - meta: type: file description: gatk dictionary file pattern: "*.{dict}" + ontologies: [] - "*.dict": type: file description: gatk dictionary file pattern: "*.{dict}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@maxulysse" - "@ramprasadn" diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/environment.yml b/modules/nf-core/gatk4/determinegermlinecontigploidy/environment.yml index b562b72..67e0eb8 100644 --- a/modules/nf-core/gatk4/determinegermlinecontigploidy/environment.yml +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/environment.yml @@ -5,5 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/gatk4 - - bioconda::gatk4=4.6.1.0 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf b/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf index 6edccf6..4db4969 100644 --- a/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf @@ -1,40 +1,40 @@ - process GATK4_DETERMINEGERMLINECONTIGPLOIDY { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': - 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" input: tuple val(meta), path(counts), path(bed), path(exclude_beds) tuple val(meta2), path(ploidy_model) - path(contig_ploidy_table) + path contig_ploidy_table output: tuple val(meta), path("${prefix}-calls"), emit: calls tuple val(meta), path("${prefix}-model"), emit: model, optional: true - path "versions.yml" , emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def intervals = bed ? "--intervals ${bed}" : "" - def exclude = exclude_beds ? exclude_beds.collect(){"--exclude-intervals $it"}.join(" ") : "" + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def intervals = bed ? "--intervals ${bed}" : "" + def exclude = exclude_beds ? exclude_beds.collect { "--exclude-intervals ${it}" }.join(" ") : "" def contig_ploidy = contig_ploidy_table ? "--contig-ploidy-priors ${contig_ploidy_table}" : "" - def model = ploidy_model ? "--model ${ploidy_model}" : "" - def input_list = counts.collect(){"--input $it"}.join(" ") + def model = ploidy_model ? "--model ${ploidy_model}" : "" + def input_list = counts.collect { "--input ${it}" }.join(" ") def avail_mem = 3072 if (!task.memory) { - log.info '[GATK DetermineGermlineContigPloidy] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() + log.info('[GATK DetermineGermlineContigPloidy] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() } """ export THEANO_FLAGS="base_compiledir=\$PWD" diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml b/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml index 828628b..45e7176 100644 --- a/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml @@ -27,16 +27,20 @@ input: type: file description: One or more count TSV files created with gatk/collectreadcounts pattern: "*.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV - bed: type: file description: Optional - A bed file containing the intervals to include in the process pattern: "*.bed" + ontologies: [] - exclude_beds: type: file description: Optional - One or more bed files containing intervals to exclude from the process pattern: "*.bed" + ontologies: [] - - meta2: type: map description: | @@ -48,13 +52,15 @@ input: Optional - A folder containing the ploidy model. When a model is supplied to tool will run in CASE mode. pattern: '*-model/' - - - contig_ploidy_table: - type: file - description: The contig ploidy priors table - pattern: "*.tsv" + - contig_ploidy_table: + type: file + description: The contig ploidy priors table + pattern: "*.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV output: - - calls: - - meta: + calls: + - - meta: type: map description: | Groovy Map containing sample information @@ -63,8 +69,8 @@ output: type: directory description: A folder containing the calls from the input files pattern: "*-calls/" - - model: - - meta: + model: + - - meta: type: map description: | Groovy Map containing sample information @@ -75,11 +81,13 @@ output: A folder containing the model from the input files. This will only be created in COHORT mode (when no model is supplied to the process). pattern: "*-model/" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@nvnieuwk" maintainers: diff --git a/modules/nf-core/gatk4/filterintervals/environment.yml b/modules/nf-core/gatk4/filterintervals/environment.yml index b562b72..67e0eb8 100644 --- a/modules/nf-core/gatk4/filterintervals/environment.yml +++ b/modules/nf-core/gatk4/filterintervals/environment.yml @@ -5,5 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/gatk4 - - bioconda::gatk4=4.6.1.0 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/filterintervals/main.nf b/modules/nf-core/gatk4/filterintervals/main.nf index ada752d..7733bd8 100644 --- a/modules/nf-core/gatk4/filterintervals/main.nf +++ b/modules/nf-core/gatk4/filterintervals/main.nf @@ -1,11 +1,11 @@ process GATK4_FILTERINTERVALS { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': - 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" input: tuple val(meta), path(intervals) @@ -14,31 +14,32 @@ process GATK4_FILTERINTERVALS { output: tuple val(meta), path("*.interval_list"), emit: interval_list - path "versions.yml" , emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def annotated_command = annotated_intervals ? "--annotated-intervals $annotated_intervals" : "" - def read_counts_command = read_counts ? read_counts.collect{"--input $it"}.join(" ") : "" + def annotated_command = annotated_intervals ? "--annotated-intervals ${annotated_intervals}" : "" + def read_counts_command = read_counts ? read_counts.collect { "--input ${it}" }.join(" ") : "" def avail_mem = 3072 if (!task.memory) { - log.info '[GATK FilterIntervals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() + log.info('[GATK FilterIntervals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() } """ gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ FilterIntervals \\ - $annotated_command \\ - $read_counts_command \\ - --intervals $intervals \\ + ${annotated_command} \\ + ${read_counts_command} \\ + --intervals ${intervals} \\ --output ${prefix}.interval_list \\ - $args + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gatk4/filterintervals/meta.yml b/modules/nf-core/gatk4/filterintervals/meta.yml index 87376ed..b897744 100644 --- a/modules/nf-core/gatk4/filterintervals/meta.yml +++ b/modules/nf-core/gatk4/filterintervals/meta.yml @@ -25,6 +25,7 @@ input: type: file description: Processed interval list file (processed_intervals.interval_list) pattern: "*.interval_list" + ontologies: [] - - meta2: type: map description: | @@ -34,6 +35,8 @@ input: type: file description: Read counts input file pattern: "*.{tsv, hdf5}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV - - meta3: type: map description: | @@ -43,9 +46,11 @@ input: type: file description: Annotated intervals TSV file (annotated_intervals.tsv). pattern: "*.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV output: - - interval_list: - - meta: + interval_list: + - - meta: type: map description: | Groovy Map containing sample information @@ -54,15 +59,14 @@ output: type: file description: Filtered interval list file pattern: "*.interval_list" - - _list: - type: file - description: Filtered interval list file - pattern: "*.interval_list" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@ryanjameskennedy" - "@ViktorHy" diff --git a/modules/nf-core/gatk4/germlinecnvcaller/environment.yml b/modules/nf-core/gatk4/germlinecnvcaller/environment.yml index b562b72..67e0eb8 100644 --- a/modules/nf-core/gatk4/germlinecnvcaller/environment.yml +++ b/modules/nf-core/gatk4/germlinecnvcaller/environment.yml @@ -5,5 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/gatk4 - - bioconda::gatk4=4.6.1.0 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/germlinecnvcaller/main.nf b/modules/nf-core/gatk4/germlinecnvcaller/main.nf index e8afb1f..6da848d 100644 --- a/modules/nf-core/gatk4/germlinecnvcaller/main.nf +++ b/modules/nf-core/gatk4/germlinecnvcaller/main.nf @@ -1,11 +1,11 @@ process GATK4_GERMLINECNVCALLER { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': - 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" input: tuple val(meta), path(tsv), path(intervals), path(ploidy), path(model) @@ -13,26 +13,27 @@ process GATK4_GERMLINECNVCALLER { output: tuple val(meta), path("*-cnv-model/*-calls"), emit: cohortcalls, optional: true tuple val(meta), path("*-cnv-model/*-model"), emit: cohortmodel, optional: true - tuple val(meta), path("*-cnv-calls/*-calls"), emit: casecalls , optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("*-cnv-calls/*-calls"), emit: casecalls, optional: true + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def intervals_command = intervals ? "--intervals ${intervals}" : "" - def ploidy_command = ploidy ? "--contig-ploidy-calls ${ploidy}" : "" - def model_command = model ? "--model ${model}" : "" - def input_list = tsv.collect{"--input $it"}.join(' ') - def output_command = model ? "--output ${prefix}-cnv-calls" : "--output ${prefix}-cnv-model" + def intervals_command = intervals ? "--intervals ${intervals}" : "" + def ploidy_command = ploidy ? "--contig-ploidy-calls ${ploidy}" : "" + def model_command = model ? "--model ${model}" : "" + def input_list = tsv.collect { "--input ${it}" }.join(' ') + def output_command = model ? "--output ${prefix}-cnv-calls" : "--output ${prefix}-cnv-model" def avail_mem = 3072 if (!task.memory) { - log.info '[GATK GermlineCNVCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() + log.info('[GATK GermlineCNVCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() } """ export THEANO_FLAGS="base_compiledir=\$PWD" @@ -42,13 +43,13 @@ process GATK4_GERMLINECNVCALLER { gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ GermlineCNVCaller \\ - $input_list \\ - $ploidy_command \\ - $output_command \\ - --output-prefix $prefix \\ - $args \\ - $intervals_command \\ - $model_command + ${input_list} \\ + ${ploidy_command} \\ + ${output_command} \\ + --output-prefix ${prefix} \\ + ${args} \\ + ${intervals_command} \\ + ${model_command} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gatk4/germlinecnvcaller/meta.yml b/modules/nf-core/gatk4/germlinecnvcaller/meta.yml index a185d9d..4d8bb1d 100644 --- a/modules/nf-core/gatk4/germlinecnvcaller/meta.yml +++ b/modules/nf-core/gatk4/germlinecnvcaller/meta.yml @@ -26,11 +26,14 @@ input: type: file description: One or more count TSV files created with gatk/collectreadcounts pattern: "*.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV - intervals: type: file description: Optional - A bed file containing the intervals to include in the process pattern: "*.bed" + ontologies: [] - ploidy: type: directory description: Directory containing ploidy calls produced by determinegermlinecontigploidy @@ -42,8 +45,8 @@ input: cohort mode pattern: "*-cnv-model/*-model" output: - - cohortcalls: - - meta: + cohortcalls: + - - meta: type: map description: | Groovy Map containing sample information @@ -53,8 +56,8 @@ output: description: Tar gzipped directory containing calls produced by germlinecnvcaller case mode pattern: "*-cnv-model/*-calls" - - cohortmodel: - - meta: + cohortmodel: + - - meta: type: map description: | Groovy Map containing sample information @@ -64,8 +67,8 @@ output: description: Optional - Tar gzipped directory containing the model produced by germlinecnvcaller cohort mode pattern: "*-cnv-model/*-model" - - casecalls: - - meta: + casecalls: + - - meta: type: map description: | Groovy Map containing sample information @@ -75,11 +78,13 @@ output: description: Tar gzipped directory containing calls produced by germlinecnvcaller case mode pattern: "*-cnv-calls/*-calls" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@ryanjameskennedy" - "@ViktorHy" diff --git a/modules/nf-core/gatk4/indexfeaturefile/environment.yml b/modules/nf-core/gatk4/indexfeaturefile/environment.yml index b562b72..67e0eb8 100644 --- a/modules/nf-core/gatk4/indexfeaturefile/environment.yml +++ b/modules/nf-core/gatk4/indexfeaturefile/environment.yml @@ -5,5 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/gatk4 - - bioconda::gatk4=4.6.1.0 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/indexfeaturefile/main.nf b/modules/nf-core/gatk4/indexfeaturefile/main.nf index 6993537..aa8fe72 100644 --- a/modules/nf-core/gatk4/indexfeaturefile/main.nf +++ b/modules/nf-core/gatk4/indexfeaturefile/main.nf @@ -1,18 +1,18 @@ process GATK4_INDEXFEATUREFILE { - tag "$meta.id" + tag "${meta.id}" label 'process_low' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': - 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" input: tuple val(meta), path(feature_file) output: tuple val(meta), path("*.{tbi,idx}"), emit: index - path "versions.yml" , emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -22,16 +22,17 @@ process GATK4_INDEXFEATUREFILE { def avail_mem = 3072 if (!task.memory) { - log.info '[GATK IndexFeatureFile] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() + log.info('[GATK IndexFeatureFile] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() } """ gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ IndexFeatureFile \\ - --input $feature_file \\ + --input ${feature_file} \\ --tmp-dir . \\ - $args + ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gatk4/indexfeaturefile/meta.yml b/modules/nf-core/gatk4/indexfeaturefile/meta.yml index cfc717d..ff747b5 100644 --- a/modules/nf-core/gatk4/indexfeaturefile/meta.yml +++ b/modules/nf-core/gatk4/indexfeaturefile/meta.yml @@ -24,9 +24,10 @@ input: type: file description: VCF/BED file pattern: "*.{vcf,vcf.gz,bed,bed.gz}" + ontologies: [] output: - - index: - - meta: + index: + - - meta: type: map description: | Groovy Map containing sample information @@ -35,11 +36,14 @@ output: type: file description: Index for VCF/BED file pattern: "*.{tbi,idx}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@santiagorevale" maintainers: diff --git a/modules/nf-core/gatk4/intervallisttools/environment.yml b/modules/nf-core/gatk4/intervallisttools/environment.yml index b562b72..67e0eb8 100644 --- a/modules/nf-core/gatk4/intervallisttools/environment.yml +++ b/modules/nf-core/gatk4/intervallisttools/environment.yml @@ -5,5 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/gatk4 - - bioconda::gatk4=4.6.1.0 + - bioconda::gatk4=4.6.2.0 + # renovate: datasource=conda depName=bioconda/gcnvkernel - bioconda::gcnvkernel=0.9 diff --git a/modules/nf-core/gatk4/intervallisttools/main.nf b/modules/nf-core/gatk4/intervallisttools/main.nf index bf20d4c..911eb45 100644 --- a/modules/nf-core/gatk4/intervallisttools/main.nf +++ b/modules/nf-core/gatk4/intervallisttools/main.nf @@ -1,18 +1,18 @@ process GATK4_INTERVALLISTTOOLS { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': - 'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data' + : 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}" input: tuple val(meta), path(intervals) output: tuple val(meta), path("*_split/*/*.interval_list"), emit: interval_list - path "versions.yml" , emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -23,9 +23,10 @@ process GATK4_INTERVALLISTTOOLS { def avail_mem = 3072 if (!task.memory) { - log.info '[GATK IntervalListTools] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() + log.info('[GATK IntervalListTools] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() } """ @@ -33,10 +34,10 @@ process GATK4_INTERVALLISTTOOLS { gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ IntervalListTools \\ - --INPUT $intervals \\ + --INPUT ${intervals} \\ --OUTPUT ${prefix}_split \\ --TMP_DIR . \\ - $args + ${args} python3 < versions.yml "${task.process}": diff --git a/modules/nf-core/gatk4/preprocessintervals/meta.yml b/modules/nf-core/gatk4/preprocessintervals/meta.yml index 0e13674..af3e5bc 100644 --- a/modules/nf-core/gatk4/preprocessintervals/meta.yml +++ b/modules/nf-core/gatk4/preprocessintervals/meta.yml @@ -26,6 +26,7 @@ input: type: file description: The reference fasta file pattern: "*.fasta" + ontologies: [] - - meta2: type: map description: | @@ -35,6 +36,7 @@ input: type: file description: Index of reference fasta file pattern: "*.fasta.fai" + ontologies: [] - - meta3: type: map description: | @@ -44,6 +46,7 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" + ontologies: [] - - meta4: type: map description: | @@ -54,6 +57,7 @@ input: description: Interval file (bed or interval_list) with the genomic regions to be included from the analysis (optional) pattern: "*.{bed,interval_list}" + ontologies: [] - - meta5: type: map description: | @@ -64,9 +68,10 @@ input: description: Interval file (bed or interval_list) with the genomic regions to be excluded from the analysis (optional) pattern: "*.{bed,interval_list}" + ontologies: [] output: - - interval_list: - - meta: + interval_list: + - - meta: type: map description: | Groovy Map containing reference information @@ -75,15 +80,14 @@ output: type: file description: Processed interval list file pattern: "*.{bed,interval_list}" - - _list: - type: file - description: Processed interval list file - pattern: "*.{bed,interval_list}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@ryanjameskennedy" - "@ViktorHy" diff --git a/modules/nf-core/gawk/meta.yml b/modules/nf-core/gawk/meta.yml index 34c50b1..732e18a 100644 --- a/modules/nf-core/gawk/meta.yml +++ b/modules/nf-core/gawk/meta.yml @@ -26,37 +26,42 @@ input: - input: type: file description: The input file - Specify the logic that needs to be executed on - this file on the `ext.args2` or in the program file. - If the files have a `.gz` extension, they will be unzipped using `zcat`. + this file on the `ext.args2` or in the program file. If the files have a `.gz` + extension, they will be unzipped using `zcat`. pattern: "*" - - - program_file: - type: file - description: Optional file containing logic for awk to execute. If you don't - wish to use a file, you can use `ext.args2` to specify the logic. - pattern: "*" - - - disable_redirect_output: - type: boolean - description: Disable the redirection of awk output to a given file. This is - useful if you want to use awk's built-in redirect to write files instead - of the shell's redirect. + ontologies: [] + - program_file: + type: file + description: Optional file containing logic for awk to execute. If you don't wish + to use a file, you can use `ext.args2` to specify the logic. + pattern: "*" + ontologies: [] + - disable_redirect_output: + type: boolean + description: Disable the redirection of awk output to a given file. This is useful + if you want to use awk's built-in redirect to write files instead of the shell's + redirect. output: - - output: - - meta: + output: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - "*.${suffix}": type: file - description: The output file - if using shell redirection, specify the name of this - file using `ext.prefix` and the extension using `ext.suffix`. Otherwise, ensure - the awk program produces files with the extension in `ext.suffix`. + description: The output file - if using shell redirection, specify the name + of this file using `ext.prefix` and the extension using `ext.suffix`. Otherwise, + ensure the awk program produces files with the extension in `ext.suffix`. pattern: "*" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@nvnieuwk" maintainers: diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml index 62054fc..89e12a6 100644 --- a/modules/nf-core/samtools/faidx/environment.yml +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index 6de0095..ed2d70a 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_FAIDX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index 256a330..b7a2e0c 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -26,6 +26,7 @@ input: type: file description: FASTA file pattern: "*.{fa,fasta}" + ontologies: [] - - meta2: type: map description: | @@ -35,13 +36,14 @@ input: type: file description: FASTA index file pattern: "*.{fai}" - - - get_sizes: - type: boolean - description: use cut to get the sizes of the index (true) or not (false) + ontologies: [] + - get_sizes: + type: boolean + description: use cut to get the sizes of the index (true) or not (false) output: - - fa: - - meta: + fa: + - - meta: type: map description: | Groovy Map containing sample information @@ -50,28 +52,31 @@ output: type: file description: FASTA file pattern: "*.{fa}" - - fai: - - meta: + ontologies: [] + sizes: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.fai": + - "*.sizes": type: file - description: FASTA index file - pattern: "*.{fai}" - - sizes: - - meta: + description: File containing chromosome lengths + pattern: "*.{sizes}" + ontologies: [] + fai: + - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.sizes": + - "*.fai": type: file - description: File containing chromosome lengths - pattern: "*.{sizes}" - - gzi: - - meta: + description: FASTA index file + pattern: "*.{fai}" + ontologies: [] + gzi: + - - meta: type: map description: | Groovy Map containing sample information @@ -80,11 +85,14 @@ output: type: file description: Optional gzip index file for compressed inputs pattern: "*.gzi" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml index 62054fc..89e12a6 100644 --- a/modules/nf-core/samtools/index/environment.yml +++ b/modules/nf-core/samtools/index/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 3117561..a77ad82 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_INDEX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(input) @@ -24,7 +24,7 @@ process SAMTOOLS_INDEX { """ samtools \\ index \\ - -@ ${task.cpus-1} \\ + -@ ${task.cpus} \\ $args \\ $input diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml index db8df0d..1bed6bc 100644 --- a/modules/nf-core/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -25,9 +25,10 @@ input: - input: type: file description: input file + ontologies: [] output: - - bai: - - meta: + bai: + - - meta: type: map description: | Groovy Map containing sample information @@ -36,8 +37,9 @@ output: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" - - csi: - - meta: + ontologies: [] + csi: + - - meta: type: map description: | Groovy Map containing sample information @@ -46,8 +48,9 @@ output: type: file description: CSI index file pattern: "*.{csi}" - - crai: - - meta: + ontologies: [] + crai: + - - meta: type: map description: | Groovy Map containing sample information @@ -56,11 +59,14 @@ output: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml index 8cae571..89e12a6 100644 --- a/modules/nf-core/samtools/view/environment.yml +++ b/modules/nf-core/samtools/view/environment.yml @@ -5,5 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/htslib - - bioconda::htslib=1.21 - - bioconda::samtools=1.21 + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index f43a4c6..02d9b0f 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_VIEW { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(input), path(index) @@ -50,6 +50,7 @@ process SAMTOOLS_VIEW { } } """ + # Note: --threads value represents *additional* CPUs to allocate (total CPUs = 1 + --threads). samtools \\ view \\ --threads ${task.cpus-1} \\ diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml index 28c268a..3ebbdb8 100644 --- a/modules/nf-core/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -26,10 +26,12 @@ input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" + ontologies: [] - index: type: file description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) pattern: "*.{.bai,.csi,.crai}" + ontologies: [] - - meta2: type: map description: | @@ -39,17 +41,19 @@ input: type: file description: Reference file the CRAM was created with (optional) pattern: "*.{fasta,fa}" - - - qname: - type: file - description: Optional file with read names to output only select alignments - pattern: "*.{txt,list}" - - - index_format: - type: string - description: Index format, used together with ext.args = '--write-index' - pattern: "bai|csi|crai" + ontologies: [] + - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" + ontologies: [] + - index_format: + type: string + description: Index format, used together with ext.args = '--write-index' + pattern: "bai|csi|crai" output: - - bam: - - meta: + bam: + - - meta: type: map description: | Groovy Map containing sample information @@ -58,8 +62,9 @@ output: type: file description: optional filtered/converted BAM file pattern: "*.{bam}" - - cram: - - meta: + ontologies: [] + cram: + - - meta: type: map description: | Groovy Map containing sample information @@ -68,8 +73,9 @@ output: type: file description: optional filtered/converted CRAM file pattern: "*.{cram}" - - sam: - - meta: + ontologies: [] + sam: + - - meta: type: map description: | Groovy Map containing sample information @@ -78,8 +84,9 @@ output: type: file description: optional filtered/converted SAM file pattern: "*.{sam}" - - bai: - - meta: + ontologies: [] + bai: + - - meta: type: map description: | Groovy Map containing sample information @@ -88,8 +95,9 @@ output: type: file description: optional BAM file index pattern: "*.{bai}" - - csi: - - meta: + ontologies: [] + csi: + - - meta: type: map description: | Groovy Map containing sample information @@ -98,8 +106,9 @@ output: type: file description: optional tabix BAM file index pattern: "*.{csi}" - - crai: - - meta: + ontologies: [] + crai: + - - meta: type: map description: | Groovy Map containing sample information @@ -108,8 +117,9 @@ output: type: file description: optional CRAM file index pattern: "*.{crai}" - - unselected: - - meta: + ontologies: [] + unselected: + - - meta: type: map description: | Groovy Map containing sample information @@ -118,8 +128,9 @@ output: type: file description: optional file with unselected alignments pattern: "*.unselected.{bam,cram,sam}" - - unselected_index: - - meta: + ontologies: [] + unselected_index: + - - meta: type: map description: | Groovy Map containing sample information @@ -128,11 +139,14 @@ output: type: file description: index for the "unselected" file pattern: "*.unselected.{csi,crai}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@drpatelh" - "@joseespinosa" From fc7d60a552fcdfac8f4b7dd9f300358789b6f404 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 31 Oct 2025 14:51:21 +0100 Subject: [PATCH 225/234] update snapshot --- subworkflows/local/gens_pon/main.nf | 12 +-- .../local/germlinecnvcaller_cohort/main.nf | 24 ++--- subworkflows/local/prepare_genome/main.nf | 24 ++--- .../main.nf | 98 +++++++++---------- tests/default.nf.test.snap | 10 +- tests/gens_pon.nf.test.snap | 8 +- 6 files changed, 86 insertions(+), 90 deletions(-) diff --git a/subworkflows/local/gens_pon/main.nf b/subworkflows/local/gens_pon/main.nf index 82b2250..99fb8e7 100644 --- a/subworkflows/local/gens_pon/main.nf +++ b/subworkflows/local/gens_pon/main.nf @@ -4,15 +4,15 @@ include { SAMTOOLS_INDEX } from '../../../modules/nf-core/s workflow GENS_PON { take: - ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] - val_pon_name // string: [optional] name for panel of normals - ch_dict // channel: [optional] [ val(meta), path(dict) ] - ch_fai // channel: [optional] [ val(meta), path(fai) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] + val_pon_name // string: [optional] name for panel of normals + ch_dict // channel: [optional] [ val(meta), path(dict) ] + ch_fai // channel: [optional] [ val(meta), path(fai) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_interval_list // channel: [mandatory] [ val(meta), path(interval_list) ] main: - versions = Channel.empty() + versions = channel.empty() // Filter out files that lack indices, and generate them ch_input diff --git a/subworkflows/local/germlinecnvcaller_cohort/main.nf b/subworkflows/local/germlinecnvcaller_cohort/main.nf index 4f2f25a..e4ce777 100644 --- a/subworkflows/local/germlinecnvcaller_cohort/main.nf +++ b/subworkflows/local/germlinecnvcaller_cohort/main.nf @@ -13,21 +13,21 @@ include { SAMTOOLS_INDEX } from '. workflow GERMLINECNVCALLER_COHORT { take: - ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] - val_pon_name // string: [optional] name for panel of normals - ch_dict // channel: [optional] [ val(meta), path(dict) ] - ch_fai // channel: [optional] [ val(meta), path(fai) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_exclude_bed // channel: [optional] [ val(meta), path(bed) ] + ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] + val_pon_name // string: [optional] name for panel of normals + ch_dict // channel: [optional] [ val(meta), path(dict) ] + ch_fai // channel: [optional] [ val(meta), path(fai) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_exclude_bed // channel: [optional] [ val(meta), path(bed) ] ch_user_exclude_interval_list // channel: [optional] [ val(meta), path(intervals) ] - ch_mappable_regions // channel: [optional] [ val(meta), path(bed) ] - ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] - ch_segmental_duplications // channel: [optional] [ val(meta), path(bed) ] - ch_target_bed // channel: [optional] [ val(meta), path(bed) ] - ch_user_target_interval_list // channel: [optional] [ val(meta), path(intervals) ] + ch_mappable_regions // channel: [optional] [ val(meta), path(bed) ] + ch_ploidy_priors // channel: [mandatory] [ path(tsv) ] + ch_segmental_duplications // channel: [optional] [ val(meta), path(bed) ] + ch_target_bed // channel: [optional] [ val(meta), path(bed) ] + ch_user_target_interval_list // channel: [optional] [ val(meta), path(intervals) ] main: - versions = Channel.empty() + versions = channel.empty() // Prepare references GATK4_INDEXFEATUREFILE_MAPPABILITY(ch_mappable_regions) diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index a40da87..c52af73 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -6,19 +6,19 @@ include { SAMTOOLS_FAIDX } from '.. // Prepare references workflow PREPARE_GENOME { take: - fasta // channel: [mandatory] [ val(meta), path(fasta) ] - user_dict // channel: [optional] [ val(meta), path(dict) ] - user_fai // channel: [optional] [ val(meta), path(fai) ] + fasta // channel: [mandatory] [ val(meta), path(fasta) ] + user_dict // channel: [optional] [ val(meta), path(dict) ] + user_fai // channel: [optional] [ val(meta), path(fai) ] user_gens_interval_list // channel: [optional] [ val(meta), path(gens_interval_list) ] user_mutect2_target_bed // channel: [optional] [ val(meta), path(mutect2_target_bed) ] - tools // array: [mandatory] [ tools ] + tools // array: [mandatory] [ tools ] main: - dict = Channel.empty() - fai = Channel.empty() - gens_interval_list = Channel.empty() - mutect2_target_bed = Channel.empty() - versions = Channel.empty() + dict = channel.empty() + fai = channel.empty() + gens_interval_list = channel.empty() + mutect2_target_bed = channel.empty() + versions = channel.empty() // If a user_dict is provided, no fasta will be used to generate a dict // Otherwise, GATK4_CREATESEQUENCEDICTIONARY will be run to generate a dict @@ -70,9 +70,9 @@ workflow PREPARE_GENOME { versions = versions.mix(SAMTOOLS_FAIDX.out.versions) emit: - dict // channel: [ val(meta), path(dict) ] - fai // channel: [ val(meta), path(fai) ] + dict // channel: [ val(meta), path(dict) ] + fai // channel: [ val(meta), path(fai) ] gens_interval_list // channel: [ val(meta), path(gens_interval_list) ] mutect2_target_bed // channel: [ val(meta), path(mutect2_target_bed) ] - versions // channel: [ path(versions.yml)] + versions // channel: [ path(versions.yml)] } diff --git a/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf b/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf index 0c37210..47125b3 100644 --- a/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf @@ -8,15 +8,15 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { samplesheetToList } from 'plugin/nf-schema' -include { paramsHelp } from 'plugin/nf-schema' -include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' -include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' -include { imNotification } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -25,30 +25,29 @@ include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipelin */ workflow PIPELINE_INITIALISATION { - take: - version // boolean: Display version and exit - validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs + version // boolean: Display version and exit + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs nextflow_cli_args // array: List of positional nextflow CLI args - outdir // string: The output directory where the results will be saved - input // string: Path to input samplesheet - help // boolean: Display help message and exit - help_full // boolean: Show the full help message - show_hidden // boolean: Show hidden parameters in the help message + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + help // boolean: Display help message and exit + help_full // boolean: Show the full help message + show_hidden // boolean: Show hidden parameters in the help message main: - ch_versions = Channel.empty() + ch_versions = channel.empty() // // Print version and exit if required and dump pipeline parameters to JSON file // - UTILS_NEXTFLOW_PIPELINE ( + UTILS_NEXTFLOW_PIPELINE( version, true, outdir, - workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1, ) // @@ -64,7 +63,7 @@ workflow PIPELINE_INITIALISATION { \033[0;35m nf-core/createpanelrefs ${workflow.manifest.version}\033[0m -\033[2m----------------------------------------------------\033[0m- """ - after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${workflow.manifest.doi ? "\n" : ""} + after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/', '')}" }.join("\n")}${workflow.manifest.doi ? "\n" : ""} * The nf-core framework https://doi.org/10.1038/s41587-020-0439-x @@ -73,7 +72,7 @@ workflow PIPELINE_INITIALISATION { """ command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " - UTILS_NFSCHEMA_PLUGIN ( + UTILS_NFSCHEMA_PLUGIN( workflow, validate_params, null, @@ -82,13 +81,13 @@ workflow PIPELINE_INITIALISATION { show_hidden, before_text, after_text, - command + command, ) // // Check config provided to the pipeline // - UTILS_NFCORE_PIPELINE ( + UTILS_NFCORE_PIPELINE( nextflow_cli_args ) @@ -101,8 +100,7 @@ workflow PIPELINE_INITIALISATION { // Create channel from input file provided through params.input // - ch_samplesheet = Channel - .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + ch_samplesheet = channel.fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) emit: samplesheet = ch_samplesheet @@ -116,15 +114,14 @@ workflow PIPELINE_INITIALISATION { */ workflow PIPELINE_COMPLETION { - take: - email // string: email address - email_on_fail // string: email address sent on pipeline failure + email // string: email address + email_on_fail // string: email address sent on pipeline failure plaintext_email // boolean: Send plain-text email instead of HTML - outdir // path: Path to output directory where results will be published + outdir // path: Path to output directory where results will be published monochrome_logs // boolean: Disable ANSI colour codes in log output - hook_url // string: hook URL for notifications - multiqc_report // string: Path to MultiQC report + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report main: summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") @@ -153,7 +150,7 @@ workflow PIPELINE_COMPLETION { } workflow.onError { - log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + log.error("Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting") } } @@ -174,8 +171,8 @@ def validateInputParameters() { // def getGenomeAttribute(attribute) { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] + if (params.genomes[params.genome].containsKey(attribute)) { + return params.genomes[params.genome][attribute] } } return null @@ -186,11 +183,7 @@ def getGenomeAttribute(attribute) { // def genomeExistsError() { if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + " Currently, the available genome keys are:\n" + " ${params.genomes.keySet().join(", ")}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" error(error_string) } } @@ -202,11 +195,11 @@ def toolCitationText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + ".", + ].join(' ').trim() return citation_text } @@ -216,9 +209,9 @@ def toolBibliographyText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • ", + ].join(' ').trim() return reference_text } @@ -240,7 +233,10 @@ def methodsDescriptionText(mqc_methods_yaml) { temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " } meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) - } else meta["doi_text"] = "" + } + else { + meta["doi_text"] = "" + } meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " // Tool references @@ -254,7 +250,7 @@ def methodsDescriptionText(mqc_methods_yaml) { def methods_text = mqc_methods_yaml.text - def engine = new groovy.text.SimpleTemplateEngine() + def engine = new groovy.text.SimpleTemplateEngine() def description_html = engine.createTemplate(methods_text).make(meta) return description_html.toString() diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 3ee223d..6f1540f 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -3,7 +3,7 @@ "content": [ { "CNVKIT_BATCH": { - "cnvkit": "0.9.10" + "cnvkit": "0.9.12" } }, [ @@ -36,7 +36,7 @@ ], [ "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "panel.cnn:md5,1443acdb3bb430b0c144ec100ef8a514", + "panel.cnn:md5,bb11c7ec8b2a5679fcabc4dbfa83294a", "sample3.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", "sample3.targetcoverage.cnn:md5,814200aceed64f3e0c4a69dab64553c4", "sample4.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", @@ -45,14 +45,14 @@ "test.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,814200aceed64f3e0c4a69dab64553c4", "test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", "test2.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,ae3bfc49096f86e48c37bc9b997982fb", - "sample3.bam:md5,28d7b627bcc8a220253ce2a950b18b56", - "sample4.bam:md5,d221a5042fd4cdec4fcb91e89e8bb92e" + "sample3.bam:md5,945810b0063a00721d75ff8fbfec2e82", + "sample4.bam:md5,03417a2d44a2da7a2b7ae3be276c273e" ] ], "meta": { "nf-test": "0.9.3", "nextflow": "25.10.0" }, - "timestamp": "2025-10-31T12:24:51.669930944" + "timestamp": "2025-10-31T14:28:40.413006402" } } \ No newline at end of file diff --git a/tests/gens_pon.nf.test.snap b/tests/gens_pon.nf.test.snap index dc8a5d1..357d60d 100644 --- a/tests/gens_pon.nf.test.snap +++ b/tests/gens_pon.nf.test.snap @@ -3,13 +3,13 @@ "content": [ { "GATK4_COLLECTREADCOUNTS": { - "gatk4": "4.6.1.0" + "gatk4": "4.6.2.0" }, "GATK4_CREATEREADCOUNTPANELOFNORMALS": { - "gatk4": "4.6.1.0" + "gatk4": "4.6.2.0" }, "GATK4_PREPROCESSINTERVALS_GENS": { - "gatk4": "4.6.1.0" + "gatk4": "4.6.2.0" } }, [ @@ -46,6 +46,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.0" }, - "timestamp": "2025-10-31T12:25:38.351926986" + "timestamp": "2025-10-31T14:33:30.730355032" } } \ No newline at end of file From 818ee7440b4d41a78a352577ea344e188da41b12 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 31 Oct 2025 15:14:05 +0100 Subject: [PATCH 226/234] mutect2 fix --- CHANGELOG.md | 15 +++++++++++---- tests/mutect2.nf.test.snap | 16 ++++++++-------- workflows/createpanelrefs.nf | 2 +- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c7bb898..ee5e536 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,14 +46,21 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#57](https://github.com/nf-core/createpanelrefs/pull/57) - Improve syntax in `assets/schema_input.json` file, from @nvnieuwk in [#46](https://github.com/nf-core/createpanelrefs/pull/46) - [#57](https://github.com/nf-core/createpanelrefs/pull/57) - Fix missing documentation for GATK Mutect2 and GENS -### `Dependencies` +### `Dependencies` - modules | Dependency | Old version | New version | | ---------- | ----------- | ----------- | -| cnvkit | | 0.9.10 | -| gatk4 | | 4.6.1.0 | +| cnvkit | | 0.9.12 | +| gatk4 | | 4.6.2.0 | | gawk | | 5.3.0 | +| htslib | | 1.22.1 | | multiqc | | 1.32 | -| samtools | | 1.21 | +| samtools | | 1.22.1 | + +### `Dependencies` - Nextflow plugins + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| nf-schema | | 2.5.1 | ### `Deprecated` diff --git a/tests/mutect2.nf.test.snap b/tests/mutect2.nf.test.snap index 0031712..e1c5de8 100644 --- a/tests/mutect2.nf.test.snap +++ b/tests/mutect2.nf.test.snap @@ -6,13 +6,13 @@ "gawk": "5.3.0" }, "GATK4_CREATESOMATICPANELOFNORMALS": { - "gatk4": "4.6.1.0" + "gatk4": "4.6.2.0" }, "GATK4_GENOMICSDBIMPORT": { - "gatk4": "4.6.1.0" + "gatk4": "4.6.2.0" }, "GATK4_MUTECT2": { - "gatk4": "4.6.1.0" + "gatk4": "4.6.2.0" } }, [ @@ -58,19 +58,19 @@ "nf-test": "0.9.3", "nextflow": "25.10.0" }, - "timestamp": "2025-10-31T12:32:20.086767028" + "timestamp": "2025-10-31T15:11:32.603760513" }, "-profile test --tools mutect2 --mutect2_pon_name test": { "content": [ { "GATK4_CREATESOMATICPANELOFNORMALS": { - "gatk4": "4.6.1.0" + "gatk4": "4.6.2.0" }, "GATK4_GENOMICSDBIMPORT": { - "gatk4": "4.6.1.0" + "gatk4": "4.6.2.0" }, "GATK4_MUTECT2": { - "gatk4": "4.6.1.0" + "gatk4": "4.6.2.0" } }, [ @@ -114,6 +114,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.0" }, - "timestamp": "2025-10-31T12:29:50.79446037" + "timestamp": "2025-10-31T15:08:35.629022911" } } \ No newline at end of file diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index a534e83..99a91da 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -101,7 +101,7 @@ workflow CREATEPANELREFS { BAM_CREATE_SOM_PON_GATK( mutect2_input, fasta, - fai, + fai.map { meta, fai_ -> [meta, fai_, []] }, dict, mutect2_pon_name, mutect2_target_bed.map { _meta, target -> [target] }, From 6c8fb2a66b5b7716ed4388992048c16bdd12c5bb Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 31 Oct 2025 16:02:58 +0100 Subject: [PATCH 227/234] fix snapshots --- conf/modules/germlinecnvcaller_cohort.config | 16 +++++++++++++ .../local/germlinecnvcaller_cohort/main.nf | 6 ++--- tests/germlinecnvcaller_cohort.nf.test.snap | 23 +++++++++---------- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/conf/modules/germlinecnvcaller_cohort.config b/conf/modules/germlinecnvcaller_cohort.config index 0037bca..132ce0a 100644 --- a/conf/modules/germlinecnvcaller_cohort.config +++ b/conf/modules/germlinecnvcaller_cohort.config @@ -17,6 +17,22 @@ process { ] } + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_INDEXFEATUREFILE_MAPPABILITY' { + ext.when = { !params.gcnv_mappable_regions.equals(null) } + } + + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_INDEXFEATUREFILE_SEGDUP' { + ext.when = { !params.gcnv_segmental_duplications.equals(null) } + } + + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST_TARGETS' { + ext.when = { params.gcnv_analysis_type.equals("wes") && params.gcnv_target_interval_list.equals(null) && params.gcnv_target_bed } + } + + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_BEDTOINTERVALLIST_EXCLUDE' { + ext.when = { params.gcnv_analysis_type.equals("wes") && params.gcnv_exclude_interval_list.equals(null) && params.gcnv_exclude_bed } + } + withName: '.*GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS' { ext.args = { ["--imr OVERLAPPING_ONLY", "--padding ${params.gcnv_padding}", diff --git a/subworkflows/local/germlinecnvcaller_cohort/main.nf b/subworkflows/local/germlinecnvcaller_cohort/main.nf index e4ce777..4c63478 100644 --- a/subworkflows/local/germlinecnvcaller_cohort/main.nf +++ b/subworkflows/local/germlinecnvcaller_cohort/main.nf @@ -132,11 +132,11 @@ workflow GERMLINECNVCALLER_COHORT { GATK4_ANNOTATEINTERVALS.out.annotated_intervals, ) - GATK4_INTERVALLISTTOOLS(GATK4_FILTERINTERVALS.out.interval_list).interval_list.map { meta, it -> it }.flatten().set { ch_intervallist_out } + GATK4_INTERVALLISTTOOLS(GATK4_FILTERINTERVALS.out.interval_list).interval_list.map { _meta, it -> it }.flatten().set { ch_intervallist_out } ch_readcounts_out .combine(GATK4_FILTERINTERVALS.out.interval_list) - .map { meta, counts, meta2, il -> [meta, counts, il, []] } + .map { meta, counts, _meta2, il -> [meta, counts, il, []] } .set { ch_contigploidy_in } GATK4_DETERMINEGERMLINECONTIGPLOIDY( @@ -148,7 +148,7 @@ workflow GERMLINECNVCALLER_COHORT { ch_readcounts_out .combine(ch_intervallist_out) .combine(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.calls) - .map { meta, counts, il, meta2, calls -> [meta + [id: il.baseName], counts, il, calls, []] } + .map { meta, counts, il, _meta2, calls -> [meta + [id: il.baseName], counts, il, calls, []] } .set { ch_cnvcaller_in } GATK4_GERMLINECNVCALLER(ch_cnvcaller_in) diff --git a/tests/germlinecnvcaller_cohort.nf.test.snap b/tests/germlinecnvcaller_cohort.nf.test.snap index b55bd4d..35037b8 100644 --- a/tests/germlinecnvcaller_cohort.nf.test.snap +++ b/tests/germlinecnvcaller_cohort.nf.test.snap @@ -1,28 +1,27 @@ { "-profile test --tools germlinecnvcaller --input tests/csv/1.0.0/bam_sorted.csv --gcnv_model_name cohort --gcnv_ploidy_priors --gcnv_scatter_content 2": { "content": [ - 10, { "GATK4_ANNOTATEINTERVALS": { - "gatk4": "4.6.1.0" + "gatk4": "4.6.2.0" }, "GATK4_COLLECTREADCOUNTS": { - "gatk4": "4.6.1.0" + "gatk4": "4.6.2.0" }, "GATK4_DETERMINEGERMLINECONTIGPLOIDY": { - "gatk4": "4.6.1.0" + "gatk4": "4.6.2.0" }, "GATK4_FILTERINTERVALS": { - "gatk4": "4.6.1.0" + "gatk4": "4.6.2.0" }, "GATK4_GERMLINECNVCALLER": { - "gatk4": "4.6.1.0" + "gatk4": "4.6.2.0" }, "GATK4_INTERVALLISTTOOLS": { - "gatk4": "4.6.1.0" + "gatk4": "4.6.2.0" }, "GATK4_PREPROCESSINTERVALS": { - "gatk4": "4.6.1.0" + "gatk4": "4.6.2.0" } }, [ @@ -135,9 +134,9 @@ "germlinecnvcaller/readcounts/sample2.hdf5", "multiqc", "multiqc/multiqc_data", - "multiqc/multiqc_data/BETA-multiqc.parquet", "multiqc/multiqc_data/llms-full.txt", "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", "multiqc/multiqc_data/multiqc_citations.txt", "multiqc/multiqc_data/multiqc_data.json", "multiqc/multiqc_data/multiqc_software_versions.txt", @@ -173,9 +172,9 @@ ] ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.0" }, - "timestamp": "2025-07-10T12:54:19.216989849" + "timestamp": "2025-10-31T15:58:51.382285872" } } \ No newline at end of file From fad819a7821b8ec5448e80d7de69714510fa3ddc Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 31 Oct 2025 16:24:47 +0100 Subject: [PATCH 228/234] LS --- .../nf-core/bam_create_som_pon_gatk/main.nf | 68 +++++++++---------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf b/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf index bc1cd07..f219845 100644 --- a/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf @@ -2,28 +2,27 @@ // Run GATK mutect2, genomicsdbimport and createsomaticpanelofnormals // -include { GATK4_MUTECT2 } from '../../../modules/nf-core/gatk4/mutect2/main' -include { GATK4_GENOMICSDBIMPORT } from '../../../modules/nf-core/gatk4/genomicsdbimport/main' -include { GATK4_CREATESOMATICPANELOFNORMALS } from '../../../modules/nf-core/gatk4/createsomaticpanelofnormals/main' +include { GATK4_CREATESOMATICPANELOFNORMALS } from '../../../modules/nf-core/gatk4/createsomaticpanelofnormals' +include { GATK4_GENOMICSDBIMPORT } from '../../../modules/nf-core/gatk4/genomicsdbimport' +include { GATK4_MUTECT2 } from '../../../modules/nf-core/gatk4/mutect2' workflow BAM_CREATE_SOM_PON_GATK { take: - ch_mutect2_in // channel: [ val(meta), path(input), path(input_index), path(interval_file) ] - ch_fasta // channel: [ val(meta), path(fasta) ] - ch_fai // channel: [ val(meta), path(fai) ] - ch_dict // channel: [ val(meta), path(dict) ] - val_pon_norm // string: name for panel of normals - ch_gendb_intervals // channel: [ path(interval_file) ] + ch_mutect2_in // channel: [ val(meta), path(input), path(input_index), path(interval_file) ] + ch_fasta // channel: [ val(meta), path(fasta) ] + ch_fai // channel: [ val(meta), path(fai), path(gzi) ] + ch_dict // channel: [ val(meta), path(dict) ] + val_pon_norm // string: name for panel of normals + ch_gendb_intervals // channel: [ path(interval_file) ] main: - ch_versions = Channel.empty() - ch_input = ch_mutect2_in + ch_versions = channel.empty() // - // Perform variant calling for each sample using mutect2 module in panel of normals mode. + // Perform variant calling for each sample using mutect2 module in panel of normals mode // - GATK4_MUTECT2 ( - ch_input, + GATK4_MUTECT2( + ch_mutect2_in, ch_fasta, ch_fai, ch_dict, @@ -32,40 +31,39 @@ workflow BAM_CREATE_SOM_PON_GATK { [], [], [], - [] + [], ) - ch_versions = ch_versions.mix(GATK4_MUTECT2.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_MUTECT2.out.versions) // - // Convert all sample vcfs into a genomicsdb workspace using genomicsdbimport. + // Convert all sample vcfs into a genomicsdb workspace using genomicsdbimport // - ch_vcf = GATK4_MUTECT2.out.vcf.collect{it[1]}.toList() - ch_index = GATK4_MUTECT2.out.tbi.collect{it[1]}.toList() - ch_dict_gendb = ch_dict.map{meta, dict -> return dict}.toList() + ch_vcf = GATK4_MUTECT2.out.vcf.collect { _meta, vcf -> [vcf] }.toList() + ch_index = GATK4_MUTECT2.out.tbi.collect { _meta, tbi -> [tbi] }.toList() + ch_dict_gendb = ch_dict.map { _meta, dict -> [dict] }.toList() - ch_gendb_input = Channel.of([id:val_pon_norm]) + ch_gendb_input = channel.of([id: val_pon_norm]) .combine(ch_vcf) .combine(ch_index) .combine(ch_gendb_intervals) .combine(ch_dict_gendb) - .map{meta, vcf, tbi, interval, dict -> [meta, vcf, tbi, interval, [], dict]} + .map { meta, vcf, tbi, interval, dict -> [meta, vcf, tbi, interval, [], dict] } - GATK4_GENOMICSDBIMPORT ( ch_gendb_input, false, false, false ) - ch_versions = ch_versions.mix(GATK4_GENOMICSDBIMPORT.out.versions.first()) + GATK4_GENOMICSDBIMPORT(ch_gendb_input, false, false, false) + ch_versions = ch_versions.mix(GATK4_GENOMICSDBIMPORT.out.versions) // - //Panel of normals made from genomicsdb workspace using createsomaticpanelofnormals. + // Panel of normals made from genomicsdb workspace using createsomaticpanelofnormals // - GATK4_CREATESOMATICPANELOFNORMALS ( GATK4_GENOMICSDBIMPORT.out.genomicsdb, ch_fasta, ch_fai, ch_dict ) - ch_versions = ch_versions.mix(GATK4_CREATESOMATICPANELOFNORMALS.out.versions.first()) + GATK4_CREATESOMATICPANELOFNORMALS(GATK4_GENOMICSDBIMPORT.out.genomicsdb, ch_fasta, ch_fai.map { meta, fai, _gzi -> [meta, fai] }, ch_dict) + ch_versions = ch_versions.mix(GATK4_CREATESOMATICPANELOFNORMALS.out.versions) emit: - mutect2_vcf = GATK4_MUTECT2.out.vcf // channel: [ val(meta), path(vcf) ] - mutect2_index = GATK4_MUTECT2.out.tbi // channel: [ val(meta), path(tbi) ] - mutect2_stats = GATK4_MUTECT2.out.stats // channel: [ val(meta), path(stats) ] - genomicsdb = GATK4_GENOMICSDBIMPORT.out.genomicsdb // channel: [ val(meta), path(genomicsdb) ] - pon_vcf = GATK4_CREATESOMATICPANELOFNORMALS.out.vcf // channel: [ val(meta), path(vcf) ] - pon_index = GATK4_CREATESOMATICPANELOFNORMALS.out.tbi // channel: [ val(meta), path(tbi) ] - - versions = ch_versions // channel: [ path(versions.yml) ] + mutect2_vcf = GATK4_MUTECT2.out.vcf // channel: [ val(meta), path(vcf) ] + mutect2_index = GATK4_MUTECT2.out.tbi // channel: [ val(meta), path(tbi) ] + mutect2_stats = GATK4_MUTECT2.out.stats // channel: [ val(meta), path(stats) ] + genomicsdb = GATK4_GENOMICSDBIMPORT.out.genomicsdb // channel: [ val(meta), path(genomicsdb) ] + pon_vcf = GATK4_CREATESOMATICPANELOFNORMALS.out.vcf // channel: [ val(meta), path(vcf) ] + pon_index = GATK4_CREATESOMATICPANELOFNORMALS.out.tbi // channel: [ val(meta), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] } From 96facf3c454a43a07b9f2e80bd8edb10b0ad6bdb Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 31 Oct 2025 16:48:58 +0100 Subject: [PATCH 229/234] fix tuple cardinality warning --- modules.json | 6 +++--- .../nf-core/gatk4/genomicsdbimport/main.nf | 2 +- modules/nf-core/gatk4/mutect2/main.nf | 2 +- .../nf-core/bam_create_som_pon_gatk/main.nf | 20 +++++++++---------- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/modules.json b/modules.json index 1983e31..f3f8761 100644 --- a/modules.json +++ b/modules.json @@ -52,7 +52,7 @@ }, "gatk4/genomicsdbimport": { "branch": "master", - "git_sha": "ae8cd884f895585c6799ab4eb6a2c9f44df03336", + "git_sha": "6baf1c4cb0e05d7167c3d9280a03972f7fcbbbcb", "installed_by": ["bam_create_som_pon_gatk", "modules"] }, "gatk4/germlinecnvcaller": { @@ -72,7 +72,7 @@ }, "gatk4/mutect2": { "branch": "master", - "git_sha": "59a5cb7332c37182afa246a2ffdbdeb7cc4e6b75", + "git_sha": "6baf1c4cb0e05d7167c3d9280a03972f7fcbbbcb", "installed_by": ["bam_create_som_pon_gatk", "modules"] }, "gatk4/preprocessintervals": { @@ -111,7 +111,7 @@ "nf-core": { "bam_create_som_pon_gatk": { "branch": "master", - "git_sha": "59a5cb7332c37182afa246a2ffdbdeb7cc4e6b75", + "git_sha": "6baf1c4cb0e05d7167c3d9280a03972f7fcbbbcb", "installed_by": ["subworkflows"] }, "utils_nextflow_pipeline": { diff --git a/modules/nf-core/gatk4/genomicsdbimport/main.nf b/modules/nf-core/gatk4/genomicsdbimport/main.nf index 0599567..0b3341b 100644 --- a/modules/nf-core/gatk4/genomicsdbimport/main.nf +++ b/modules/nf-core/gatk4/genomicsdbimport/main.nf @@ -27,7 +27,7 @@ process GATK4_GENOMICSDBIMPORT { prefix = task.ext.prefix ?: "${meta.id}" // settings for running default create gendb mode - input_command = input_map ? "--sample-name-map ${vcf[0]}" : vcf.collect { "--variant ${it}" }.join(' ') + input_command = input_map ? "--sample-name-map ${vcf[0]}" : vcf.collect { vcf_ -> "--variant ${vcf_}" }.join(' ') genomicsdb_command = "--genomicsdb-workspace-path ${prefix}" interval_command = interval_file ? "--intervals ${interval_file}" : "--intervals ${interval_value}" diff --git a/modules/nf-core/gatk4/mutect2/main.nf b/modules/nf-core/gatk4/mutect2/main.nf index 24c4e22..f8d3b38 100644 --- a/modules/nf-core/gatk4/mutect2/main.nf +++ b/modules/nf-core/gatk4/mutect2/main.nf @@ -32,7 +32,7 @@ process GATK4_MUTECT2 { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def inputs = input.collect { "--input ${it}" }.join(" ") + def inputs = input.collect { vcf_ -> "--input ${vcf_}" }.join(" ") def interval_command = intervals ? "--intervals ${intervals}" : "" def pon_command = panel_of_normals ? "--panel-of-normals ${panel_of_normals}" : "" def gr_command = germline_resource ? "--germline-resource ${germline_resource}" : "" diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf b/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf index f219845..f700c9f 100644 --- a/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf @@ -8,11 +8,11 @@ include { GATK4_MUTECT2 } from '../../../modules/nf-core/gat workflow BAM_CREATE_SOM_PON_GATK { take: - ch_mutect2_in // channel: [ val(meta), path(input), path(input_index), path(interval_file) ] - ch_fasta // channel: [ val(meta), path(fasta) ] - ch_fai // channel: [ val(meta), path(fai), path(gzi) ] - ch_dict // channel: [ val(meta), path(dict) ] - val_pon_norm // string: name for panel of normals + ch_mutect2_in // channel: [ val(meta), path(input), path(input_index), path(interval_file) ] + ch_fasta // channel: [ val(meta), path(fasta) ] + ch_fai // channel: [ val(meta), path(fai), path(gzi) ] + ch_dict // channel: [ val(meta), path(dict) ] + val_pon_norm // string: name for panel of normals ch_gendb_intervals // channel: [ path(interval_file) ] main: @@ -59,11 +59,11 @@ workflow BAM_CREATE_SOM_PON_GATK { ch_versions = ch_versions.mix(GATK4_CREATESOMATICPANELOFNORMALS.out.versions) emit: - mutect2_vcf = GATK4_MUTECT2.out.vcf // channel: [ val(meta), path(vcf) ] - mutect2_index = GATK4_MUTECT2.out.tbi // channel: [ val(meta), path(tbi) ] - mutect2_stats = GATK4_MUTECT2.out.stats // channel: [ val(meta), path(stats) ] - genomicsdb = GATK4_GENOMICSDBIMPORT.out.genomicsdb // channel: [ val(meta), path(genomicsdb) ] + mutect2_vcf = GATK4_MUTECT2.out.vcf // channel: [ val(meta), path(vcf) ] + mutect2_index = GATK4_MUTECT2.out.tbi // channel: [ val(meta), path(tbi) ] + mutect2_stats = GATK4_MUTECT2.out.stats // channel: [ val(meta), path(stats) ] + genomicsdb = GATK4_GENOMICSDBIMPORT.out.genomicsdb // channel: [ val(meta), path(genomicsdb) ] pon_vcf = GATK4_CREATESOMATICPANELOFNORMALS.out.vcf // channel: [ val(meta), path(vcf) ] pon_index = GATK4_CREATESOMATICPANELOFNORMALS.out.tbi // channel: [ val(meta), path(tbi) ] - versions = ch_versions // channel: [ path(versions.yml) ] + versions = ch_versions // channel: [ path(versions.yml) ] } From c6a403df89c5c14d4aff8e59ec07cc1b38490899 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 5 Nov 2025 12:10:25 +0100 Subject: [PATCH 230/234] test_full --- conf/test_full.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_full.config b/conf/test_full.config index 860f66b..5ef118b 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,7 +17,7 @@ params { // Input data for full size test // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = "https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/NA12878_Agilent_full_test.csv" + input = "https://raw.githubusercontent.com/nf-core/test-datasets/createpanelrefs/csv/1.0/recal_cram_sarek.csv" // Genome references genome = 'GATK.GRCh38' From 3538a4f7ae163d158dbcec93e56b67acce0b46ba Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 5 Nov 2025 12:19:02 +0100 Subject: [PATCH 231/234] update CHANGELOG --- CHANGELOG.md | 1 + conf/test_full.config | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ee5e536..c05d374 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#10](https://github.com/nf-core/createpanelrefs/pull/10) - `GATK germlinecnvcaller` can be used to create a PON - [#17](https://github.com/nf-core/createpanelrefs/pull/17) - `GENS` can be used to create a PON - [#50](https://github.com/nf-core/createpanelrefs/pull/50) - Add auto creation of interval_list file from gens, and bed file for mutect2 +- [#62](https://github.com/nf-core/createpanelrefs/pull/62) - Add megatests ### `Updated` diff --git a/conf/test_full.config b/conf/test_full.config index 5ef118b..358564c 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -18,6 +18,7 @@ params { // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) // TODO nf-core: Give any required params for the test so that command line flags are not needed input = "https://raw.githubusercontent.com/nf-core/test-datasets/createpanelrefs/csv/1.0/recal_cram_sarek.csv" + tools = "cnvkit,germlinecnvcaller,gens,mutect2" // Genome references genome = 'GATK.GRCh38' From a1460c17465b40536675ff8b1da08c50d5ed2b90 Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Wed, 5 Nov 2025 14:47:29 +0100 Subject: [PATCH 232/234] Important! Template update for nf-core/tools v3.5.0dev (#63) * Template update for nf-core/tools version 3.5.0.dev0 * fix merge conflicts * fix merge conflicts * fix subworkflows install * Update CITATIONS.md * update CHANGELOG --- .github/workflows/release-announcements.yml | 3 - .prettierignore | 2 + CHANGELOG.md | 1 + README.md | 2 +- assets/multiqc_config.yml | 7 +- assets/schema_input.json | 4 +- conf/test_full.config | 2 - nextflow.config | 12 +- nextflow_schema.json | 262 +++++++++--------- ro-crate-metadata.json | 42 ++- .../main.nf | 8 +- 11 files changed, 179 insertions(+), 166 deletions(-) diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index 039f5e8..431d3d4 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -18,7 +18,6 @@ jobs: id: get_description run: | echo "description=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .description')" >> $GITHUB_OUTPUT - - uses: rzr/fediverse-action@master with: access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} @@ -27,9 +26,7 @@ jobs: # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release message: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - ${{ steps.get_description.outputs.description }} - Please see the changelog: ${{ github.event.release.html_url }} ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics diff --git a/.prettierignore b/.prettierignore index 2255e3e..dd749d4 100644 --- a/.prettierignore +++ b/.prettierignore @@ -12,3 +12,5 @@ testing* bin/ .nf-test/ ro-crate-metadata.json +modules/nf-core/ +subworkflows/nf-core/ diff --git a/CHANGELOG.md b/CHANGELOG.md index c05d374..8e8b164 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#54](https://github.com/nf-core/createpanelrefs/pull/54) - Template update for nf-core/tools v3.3.1 - [#54](https://github.com/nf-core/createpanelrefs/pull/54) - Update nft-utils to 0.0.4 - [#55](https://github.com/nf-core/createpanelrefs/pull/55) - Prepare relase 1.0.0 +- [#63](https://github.com/nf-core/createpanelrefs/pull/63) - Template update for nf-core/tools v3.5.0dev ### `Fixed` diff --git a/README.md b/README.md index 8894cde..c1e5512 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ -[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new/nf-core/createpanelrefs) +[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/createpanelrefs) [![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml) [![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index cefa702..a905e53 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,8 +1,7 @@ report_comment: > - This report has been generated by the nf-core/createpanelrefs analysis pipeline. For information about - how to interpret these results, please see the documentation. + This report has been generated by the nf-core/createpanelrefs + analysis pipeline. For information about how to interpret these results, please see the + documentation. report_section_order: "nf-core-createpanelrefs-methods-description": order: -1000 diff --git a/assets/schema_input.json b/assets/schema_input.json index 40d0b50..6bebae0 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,8 +10,8 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "meta": ["id"], - "errorMessage": "Sample name must be a string has to be provided and cannot contain spaces" + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] }, "bam": { "errorMessage": "BAM file cannot contain spaces, has to exist and must have extension '.bam'", diff --git a/conf/test_full.config b/conf/test_full.config index 358564c..4120344 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,8 +15,6 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = "https://raw.githubusercontent.com/nf-core/test-datasets/createpanelrefs/csv/1.0/recal_cram_sarek.csv" tools = "cnvkit,germlinecnvcaller,gens,mutect2" diff --git a/nextflow.config b/nextflow.config index 3d1ef34..2fcd3c2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -67,13 +67,13 @@ params { config_profile_name = null config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Schema validation default options - validate_params = true + validate_params = true } // Load base.config by default for all pipelines @@ -193,8 +193,6 @@ profiles { test_full { includeConfig 'conf/test_full.config' } } -// Set AWS client to anonymous when using the default igenomes_base -aws.client.anonymous = !params.igenomes_ignore && params.igenomes_base?.startsWith('s3://ngi-igenomes/igenomes/') ?: false // Load nf-core custom profiles from different institutions // If params.custom_config_base is set AND either the NXF_OFFLINE environment variable is not set or params.custom_config_base is a local path, the nfcore_custom.config file from the specified base path is included. diff --git a/nextflow_schema.json b/nextflow_schema.json index f62e0e4..0b43a3f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -5,119 +5,6 @@ "description": "Generate Panel of Normals, models or other similar references from lots of samples", "type": "object", "$defs": { - "germlinecnvcaller_options": { - "title": "Germlinecnvcaller options", - "type": "object", - "description": "Options used by the germlinecnvcaller subworkflow", - "default": "", - "properties": { - "gcnv_analysis_type": { - "type": "string", - "default": "wgs", - "description": "Specifies which analysis type for the pipeline- either 'wgs' or 'wes'.", - "fa_icon": "fas fa-align-center", - "enum": ["wgs", "wes"] - }, - "gcnv_bin_length": { - "type": "number", - "default": 1000, - "description": "Length (in bp) of the bins. If zero, no binning will be performed.", - "fa_icon": "fas fa-sort-numeric-down", - "help_text": "Used by GATK's PreprocessIntervals. GATK recommends a bin length of 1000 for WGS analysis, and 0 for WES analysis. " - }, - "gcnv_padding": { - "type": "number", - "description": "Length (in bp) of the padding regions on each side of the intervals.", - "default": 0, - "fa_icon": "fas fa-sort-numeric-down", - "help_text": "Used by GATK's PreprocessIntervals. GATK recommends a padding of 0 for WGS analysis, and 250 for WES analysis." - }, - "gcnv_model_name": { - "type": "string", - "description": "Name for panel of normals.", - "default": "germlinecnvcaller" - }, - "gcnv_readcount_format": { - "type": "string", - "description": "Output file format for count data", - "default": "HDF5", - "fa_icon": "fas fa-align-left", - "enum": ["HDF5", "TSV"] - }, - "gcnv_scatter_content": { - "type": "number", - "description": "When scattering with this argument, each of the resultant files will (ideally) have this amount of interval-counts.", - "default": 5000, - "fa_icon": "fas fa-sort-numeric-down", - "help_text": "Used by GATK's IntervalListTools." - } - } - }, - "gens_options": { - "title": "GENS options", - "type": "object", - "description": "Options used by the gens subworkflow", - "default": "", - "properties": { - "gens_bin_length": { - "type": "number", - "default": 100, - "description": "Length (in bp) of the bins. If zero, no binning will be performed.", - "fa_icon": "fas fa-sort-numeric-down", - "help_text": "Used by GATK's PreprocessIntervals. We recommend a bin length of 100." - }, - "gens_maximum_chunk_size": { - "type": "number", - "default": 167772150, - "description": "Maximum chunk size when writing the HDF5 file" - }, - "gens_min_interval_median_percentile": { - "type": "number", - "default": 5, - "description": "Minimum interval median percentile for gatk CreateReadCountPanelOfNormals", - "help_text": "Genomic intervals with a median (across samples) of fractional coverage (optionally corrected for GC bias) less than or equal to this percentile are filtered out. (This is the first filter applied.)" - }, - "gens_pon_name": { - "type": "string", - "description": "Name for panel of normals.", - "default": "gens" - }, - "gens_readcount_format": { - "type": "string", - "description": "Output file format for count data", - "default": "HDF5", - "fa_icon": "fas fa-align-left", - "enum": ["HDF5", "TSV"] - } - } - }, - "cnvkit_options": { - "title": "CNVkit options", - "type": "object", - "description": "Options used by the cnvkit subworkflow", - "default": "", - "properties": { - "cnvkit_targets": { - "type": "string", - "format": "path", - "fa_icon": "fas fa-file", - "description": "Path to directory for target file.", - "help_text": "Specify the path to the target file for CNVkit." - } - } - }, - "mutect2_options": { - "title": "Mutect2 options", - "type": "object", - "description": "Options used by the mutect2 subworkflow", - "default": "", - "properties": { - "mutect2_pon_name": { - "type": "string", - "description": "Name for panel of normals." - } - } - }, "input_output_options": { "title": "Input/output options", "type": "object", @@ -126,23 +13,15 @@ "required": ["input", "outdir"], "properties": { "input": { + "type": "string", + "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "A design file with information about the samples in your experiment. Use this parameter to specify the location of the input files. It has to be a comma-separated file with a header row. See [usage docs](https://nf-co.re/sarek/usage#input).\n\nIf no input file is specified, sarek will attempt to locate one in the `{outdir}` directory.", - "fa_icon": "fas fa-file-csv", - "schema": "assets/schema_input.json", - "anyOf": [ - { - "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/csv", - "pattern": "^\\S+\\.csv$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "fa_icon": "fas fa-file-csv" }, "outdir": { "type": "string", @@ -317,6 +196,119 @@ } } }, + "cnvkit_options": { + "title": "CNVkit options", + "type": "object", + "description": "Options used by the cnvkit subworkflow", + "default": "", + "properties": { + "cnvkit_targets": { + "type": "string", + "format": "path", + "fa_icon": "fas fa-file", + "description": "Path to directory for target file.", + "help_text": "Specify the path to the target file for CNVkit." + } + } + }, + "gens_options": { + "title": "GENS options", + "type": "object", + "description": "Options used by the gens subworkflow", + "default": "", + "properties": { + "gens_bin_length": { + "type": "number", + "default": 100, + "description": "Length (in bp) of the bins. If zero, no binning will be performed.", + "fa_icon": "fas fa-sort-numeric-down", + "help_text": "Used by GATK's PreprocessIntervals. We recommend a bin length of 100." + }, + "gens_maximum_chunk_size": { + "type": "number", + "default": 167772150, + "description": "Maximum chunk size when writing the HDF5 file" + }, + "gens_min_interval_median_percentile": { + "type": "number", + "default": 5, + "description": "Minimum interval median percentile for gatk CreateReadCountPanelOfNormals", + "help_text": "Genomic intervals with a median (across samples) of fractional coverage (optionally corrected for GC bias) less than or equal to this percentile are filtered out. (This is the first filter applied.)" + }, + "gens_pon_name": { + "type": "string", + "description": "Name for panel of normals.", + "default": "gens" + }, + "gens_readcount_format": { + "type": "string", + "description": "Output file format for count data", + "default": "HDF5", + "fa_icon": "fas fa-align-left", + "enum": ["HDF5", "TSV"] + } + } + }, + "germlinecnvcaller_options": { + "title": "Germlinecnvcaller options", + "type": "object", + "description": "Options used by the germlinecnvcaller subworkflow", + "default": "", + "properties": { + "gcnv_analysis_type": { + "type": "string", + "default": "wgs", + "description": "Specifies which analysis type for the pipeline- either 'wgs' or 'wes'.", + "fa_icon": "fas fa-align-center", + "enum": ["wgs", "wes"] + }, + "gcnv_bin_length": { + "type": "number", + "default": 1000, + "description": "Length (in bp) of the bins. If zero, no binning will be performed.", + "fa_icon": "fas fa-sort-numeric-down", + "help_text": "Used by GATK's PreprocessIntervals. GATK recommends a bin length of 1000 for WGS analysis, and 0 for WES analysis. " + }, + "gcnv_padding": { + "type": "number", + "description": "Length (in bp) of the padding regions on each side of the intervals.", + "default": 0, + "fa_icon": "fas fa-sort-numeric-down", + "help_text": "Used by GATK's PreprocessIntervals. GATK recommends a padding of 0 for WGS analysis, and 250 for WES analysis." + }, + "gcnv_model_name": { + "type": "string", + "description": "Name for panel of normals.", + "default": "germlinecnvcaller" + }, + "gcnv_readcount_format": { + "type": "string", + "description": "Output file format for count data", + "default": "HDF5", + "fa_icon": "fas fa-align-left", + "enum": ["HDF5", "TSV"] + }, + "gcnv_scatter_content": { + "type": "number", + "description": "When scattering with this argument, each of the resultant files will (ideally) have this amount of interval-counts.", + "default": 5000, + "fa_icon": "fas fa-sort-numeric-down", + "help_text": "Used by GATK's IntervalListTools." + } + } + }, + "mutect2_options": { + "title": "Mutect2 options", + "type": "object", + "description": "Options used by the mutect2 subworkflow", + "default": "", + "properties": { + "mutect2_pon_name": { + "type": "string", + "description": "Name for panel of normals." + } + } + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -477,25 +469,25 @@ }, "allOf": [ { - "$ref": "#/$defs/germlinecnvcaller_options" + "$ref": "#/$defs/input_output_options" }, { - "$ref": "#/$defs/gens_options" + "$ref": "#/$defs/main_options" }, { - "$ref": "#/$defs/cnvkit_options" + "$ref": "#/$defs/reference_genome_options" }, { - "$ref": "#/$defs/mutect2_options" + "$ref": "#/$defs/cnvkit_options" }, { - "$ref": "#/$defs/input_output_options" + "$ref": "#/$defs/gens_options" }, { - "$ref": "#/$defs/main_options" + "$ref": "#/$defs/germlinecnvcaller_options" }, { - "$ref": "#/$defs/reference_genome_options" + "$ref": "#/$defs/mutect2_options" }, { "$ref": "#/$defs/institutional_config_options" diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index c1d43bf..792448e 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,8 +22,8 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "Stable", - "datePublished": "2025-10-31T11:45:59+00:00", - "description": "

    \n \n \n \"nf-core/createpanelrefs\"\n \n

    \n\n[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new/nf-core/createpanelrefs)\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.4.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.4.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics helper pipeline that will help in creating panel of normals and other models.\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n2. Build Panel of Normals for [`CNVKIT`](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873)\n3. Build ploidy and cnv calling models for [`GATK's germlinecnvcaller workflow`](https://genome.cshlp.org/content/20/9/1297)\n4. Build Panel of Normals for [`GENS`](https://github.com/Clinical-Genomics-Lund/gens)\n5. Build Panel of Normals for [`Mutect2`](https://genome.cshlp.org/content/20/9/1297)\n6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,bam,bai,cram,crai\nsample1,sample1.bam,sample1.bai,,\nsample2,sample2.bam,,,\nsample3,sample3.bam,sample3.bai,,\nsample4,sample4.bam,,,\n```\n\nEach row in the samplesheet represents an alignment file, and it is important that you provide the files in the right format for the analysis you want to run.\n\n| Tool | Alignment format |\n| ----------------- | ---------------------------- |\n| cnvkit | bam |\n| germlinecnvcaller | bam or cram or a mix of both |\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --tools \\\n --genome GATK.GRCh38 \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n@marrip contributed in the idea that started it all.\n@matthdsm and @FriederikeHanssen contributed in the actual design.\n@ramprasadn's interest was the final push that led to the creation.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @jfy133\n- @JoseEspinosa\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "datePublished": "2025-11-05T12:59:18+00:00", + "description": "

    \n \n \n \"nf-core/createpanelrefs\"\n \n

    \n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/createpanelrefs)\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.4.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.4.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics helper pipeline that will help in creating panel of normals and other models.\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n2. Build Panel of Normals for [`CNVKIT`](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873)\n3. Build ploidy and cnv calling models for [`GATK's germlinecnvcaller workflow`](https://genome.cshlp.org/content/20/9/1297)\n4. Build Panel of Normals for [`GENS`](https://github.com/Clinical-Genomics-Lund/gens)\n5. Build Panel of Normals for [`Mutect2`](https://genome.cshlp.org/content/20/9/1297)\n6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,bam,bai,cram,crai\nsample1,sample1.bam,sample1.bai,,\nsample2,sample2.bam,,,\nsample3,sample3.bam,sample3.bai,,\nsample4,sample4.bam,,,\n```\n\nEach row in the samplesheet represents an alignment file, and it is important that you provide the files in the right format for the analysis you want to run.\n\n| Tool | Alignment format |\n| ----------------- | ---------------------------- |\n| cnvkit | bam |\n| germlinecnvcaller | bam or cram or a mix of both |\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --tools \\\n --genome GATK.GRCh38 \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n@marrip contributed in the idea that started it all.\n@matthdsm and @FriederikeHanssen contributed in the actual design.\n@ramprasadn's interest was the final push that led to the creation.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @jfy133\n- @JoseEspinosa\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -99,7 +99,7 @@ }, "mentions": [ { - "@id": "#057dabfd-116c-4959-b179-c9a8f8fae4a4" + "@id": "#a714a851-2f51-4b56-a0cf-a3dab3236531" } ], "name": "nf-core/createpanelrefs" @@ -126,8 +126,16 @@ "SoftwareSourceCode", "ComputationalWorkflow" ], + "creator": [ + { + "@id": "#max.u.garcia@gmail.com" + }, + { + "@id": "https://orcid.org/0000-0001-7313-3734" + } + ], "dateCreated": "", - "dateModified": "2025-10-31T12:45:59Z", + "dateModified": "2025-11-05T13:59:18Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -136,6 +144,14 @@ "license": [ "MIT" ], + "maintainer": [ + { + "@id": "#max.u.garcia@gmail.com" + }, + { + "@id": "https://orcid.org/0000-0001-7313-3734" + } + ], "name": [ "nf-core/createpanelrefs" ], @@ -166,11 +182,11 @@ "version": "!>=25.04.0" }, { - "@id": "#057dabfd-116c-4959-b179-c9a8f8fae4a4", + "@id": "#a714a851-2f51-4b56-a0cf-a3dab3236531", "@type": "TestSuite", "instance": [ { - "@id": "#0f242615-fe21-499a-a635-ad8aa90acf69" + "@id": "#9366c469-6a62-401f-8e9e-e2767d12899f" } ], "mainEntity": { @@ -179,7 +195,7 @@ "name": "Test suite for nf-core/createpanelrefs" }, { - "@id": "#0f242615-fe21-499a-a635-ad8aa90acf69", + "@id": "#9366c469-6a62-401f-8e9e-e2767d12899f", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/createpanelrefs", "resource": "repos/nf-core/createpanelrefs/actions/workflows/nf-test.yml", @@ -306,6 +322,18 @@ "@type": "Organization", "name": "nf-core", "url": "https://nf-co.re/" + }, + { + "@id": "#max.u.garcia@gmail.com", + "@type": "Person", + "email": "max.u.garcia@gmail.com", + "name": "Maxime U Garcia" + }, + { + "@id": "https://orcid.org/0000-0001-7313-3734", + "@type": "Person", + "email": "20065894+ramprasadn@users.noreply.github.com", + "name": "Ramprasad Neethiraj" } ] } \ No newline at end of file diff --git a/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf b/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf index 47125b3..505b582 100644 --- a/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf @@ -97,10 +97,10 @@ workflow PIPELINE_INITIALISATION { validateInputParameters() // - // Create channel from input file provided through params.input + // Create channel from input file provided through input // - ch_samplesheet = channel.fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + ch_samplesheet = channel.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input.json")) emit: samplesheet = ch_samplesheet @@ -196,7 +196,6 @@ def toolCitationText() { // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ "Tools used in the workflow included:", - "FastQC (Andrews 2010),", "MultiQC (Ewels et al. 2016)", ".", ].join(' ').trim() @@ -209,8 +208,7 @@ def toolBibliographyText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " ].join(' ').trim() return reference_text From ab14ab0ba9138ca8cc43fde611b5b02c48364c2c Mon Sep 17 00:00:00 2001 From: nf-core bot Date: Thu, 20 Nov 2025 14:50:42 +0100 Subject: [PATCH 233/234] Template update for nf-core/tools version 3.5.1 (#64) Co-authored-by: maxulysse --- .github/workflows/awsfulltest.yml | 2 +- .github/workflows/awstest.yml | 2 +- .github/workflows/download_pipeline.yml | 2 +- .github/workflows/fix_linting.yml | 2 +- .github/workflows/linting.yml | 6 +-- .github/workflows/nf-test.yml | 4 +- .../workflows/template-version-comment.yml | 2 +- .nf-core.yml | 2 +- README.md | 2 +- modules.json | 2 +- ro-crate-metadata.json | 42 ++++--------------- .../main.nf | 2 +- .../nf-core/utils_nfcore_pipeline/main.nf | 2 +- 13 files changed, 22 insertions(+), 50 deletions(-) diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 1ec2575..213c297 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -37,7 +37,7 @@ jobs: } profiles: test_full - - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: Seqera Platform debug log file path: | diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index e2e0879..b3e2d70 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -25,7 +25,7 @@ jobs: } profiles: test - - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: Seqera Platform debug log file path: | diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 6d94bcb..45884ff 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -127,7 +127,7 @@ jobs: fi - name: Upload Nextflow logfile for debugging purposes - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: nextflow_logfile.txt path: .nextflow.log* diff --git a/.github/workflows/fix_linting.yml b/.github/workflows/fix_linting.yml index 321d648..96fc86e 100644 --- a/.github/workflows/fix_linting.yml +++ b/.github/workflows/fix_linting.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: token: ${{ secrets.nf_core_bot_auth_token }} diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 30e6602..7a527a3 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,7 +11,7 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Set up Python 3.14 uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 @@ -28,7 +28,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 @@ -71,7 +71,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: linting-logs path: | diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index e20bf6d..c98d76e 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -40,7 +40,7 @@ jobs: rm -rf ./* || true rm -rf ./.??* || true ls -la ./ - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: fetch-depth: 0 @@ -85,7 +85,7 @@ jobs: TOTAL_SHARDS: ${{ needs.nf-test-changes.outputs.total_shards }} steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: fetch-depth: 0 diff --git a/.github/workflows/template-version-comment.yml b/.github/workflows/template-version-comment.yml index 5c11bb2..dea5dda 100644 --- a/.github/workflows/template-version-comment.yml +++ b/.github/workflows/template-version-comment.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: ref: ${{ github.event.pull_request.head.sha }} diff --git a/.nf-core.yml b/.nf-core.yml index 825bac8..8dff0d0 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -2,7 +2,7 @@ lint: files_exist: - conf/modules.config modules_config: false -nf_core_version: 3.4.1 +nf_core_version: 3.5.1 repository_type: pipeline template: author: "@maxulysse" diff --git a/README.md b/README.md index c1e5512..dd93a3a 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) -[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.4.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.4.1) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) diff --git a/modules.json b/modules.json index f3f8761..ef16fa5 100644 --- a/modules.json +++ b/modules.json @@ -121,7 +121,7 @@ }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", + "git_sha": "271e7fc14eb1320364416d996fb077421f3faed2", "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 792448e..446b72d 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,8 +22,8 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "Stable", - "datePublished": "2025-11-05T12:59:18+00:00", - "description": "

    \n \n \n \"nf-core/createpanelrefs\"\n \n

    \n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/createpanelrefs)\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.4.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.4.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics helper pipeline that will help in creating panel of normals and other models.\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n2. Build Panel of Normals for [`CNVKIT`](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873)\n3. Build ploidy and cnv calling models for [`GATK's germlinecnvcaller workflow`](https://genome.cshlp.org/content/20/9/1297)\n4. Build Panel of Normals for [`GENS`](https://github.com/Clinical-Genomics-Lund/gens)\n5. Build Panel of Normals for [`Mutect2`](https://genome.cshlp.org/content/20/9/1297)\n6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,bam,bai,cram,crai\nsample1,sample1.bam,sample1.bai,,\nsample2,sample2.bam,,,\nsample3,sample3.bam,sample3.bai,,\nsample4,sample4.bam,,,\n```\n\nEach row in the samplesheet represents an alignment file, and it is important that you provide the files in the right format for the analysis you want to run.\n\n| Tool | Alignment format |\n| ----------------- | ---------------------------- |\n| cnvkit | bam |\n| germlinecnvcaller | bam or cram or a mix of both |\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --tools \\\n --genome GATK.GRCh38 \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n@marrip contributed in the idea that started it all.\n@matthdsm and @FriederikeHanssen contributed in the actual design.\n@ramprasadn's interest was the final push that led to the creation.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @jfy133\n- @JoseEspinosa\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "datePublished": "2025-11-20T13:26:32+00:00", + "description": "

    \n \n \n \"nf-core/createpanelrefs\"\n \n

    \n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/createpanelrefs)\n[![GitHub Actions CI Status](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/createpanelrefs/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/createpanelrefs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/createpanelrefs)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23createpanelrefs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/createpanelrefs)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/createpanelrefs** is a bioinformatics helper pipeline that will help in creating panel of normals and other models.\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))\n2. Build Panel of Normals for [`CNVKIT`](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004873)\n3. Build ploidy and cnv calling models for [`GATK's germlinecnvcaller workflow`](https://genome.cshlp.org/content/20/9/1297)\n4. Build Panel of Normals for [`GENS`](https://github.com/Clinical-Genomics-Lund/gens)\n5. Build Panel of Normals for [`Mutect2`](https://genome.cshlp.org/content/20/9/1297)\n6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,bam,bai,cram,crai\nsample1,sample1.bam,sample1.bai,,\nsample2,sample2.bam,,,\nsample3,sample3.bam,sample3.bai,,\nsample4,sample4.bam,,,\n```\n\nEach row in the samplesheet represents an alignment file, and it is important that you provide the files in the right format for the analysis you want to run.\n\n| Tool | Alignment format |\n| ----------------- | ---------------------------- |\n| cnvkit | bam |\n| germlinecnvcaller | bam or cram or a mix of both |\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/createpanelrefs \\\n -profile \\\n --input samplesheet.csv \\\n --tools \\\n --genome GATK.GRCh38 \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/createpanelrefs/usage) and the [parameter documentation](https://nf-co.re/createpanelrefs/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/createpanelrefs/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/createpanelrefs/output).\n\n## Credits\n\nnf-core/createpanelrefs was originally written by @maxulysse.\n@marrip contributed in the idea that started it all.\n@matthdsm and @FriederikeHanssen contributed in the actual design.\n@ramprasadn's interest was the final push that led to the creation.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- @jfy133\n- @JoseEspinosa\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#createpanelrefs` channel](https://nfcore.slack.com/channels/createpanelrefs) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -99,7 +99,7 @@ }, "mentions": [ { - "@id": "#a714a851-2f51-4b56-a0cf-a3dab3236531" + "@id": "#f7d49604-4335-43dd-a5d4-d461d3d8aba6" } ], "name": "nf-core/createpanelrefs" @@ -126,16 +126,8 @@ "SoftwareSourceCode", "ComputationalWorkflow" ], - "creator": [ - { - "@id": "#max.u.garcia@gmail.com" - }, - { - "@id": "https://orcid.org/0000-0001-7313-3734" - } - ], "dateCreated": "", - "dateModified": "2025-11-05T13:59:18Z", + "dateModified": "2025-11-20T14:26:32Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -144,14 +136,6 @@ "license": [ "MIT" ], - "maintainer": [ - { - "@id": "#max.u.garcia@gmail.com" - }, - { - "@id": "https://orcid.org/0000-0001-7313-3734" - } - ], "name": [ "nf-core/createpanelrefs" ], @@ -182,11 +166,11 @@ "version": "!>=25.04.0" }, { - "@id": "#a714a851-2f51-4b56-a0cf-a3dab3236531", + "@id": "#f7d49604-4335-43dd-a5d4-d461d3d8aba6", "@type": "TestSuite", "instance": [ { - "@id": "#9366c469-6a62-401f-8e9e-e2767d12899f" + "@id": "#02ac16d0-42cf-4dcd-98f4-65b685078d57" } ], "mainEntity": { @@ -195,7 +179,7 @@ "name": "Test suite for nf-core/createpanelrefs" }, { - "@id": "#9366c469-6a62-401f-8e9e-e2767d12899f", + "@id": "#02ac16d0-42cf-4dcd-98f4-65b685078d57", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/createpanelrefs", "resource": "repos/nf-core/createpanelrefs/actions/workflows/nf-test.yml", @@ -322,18 +306,6 @@ "@type": "Organization", "name": "nf-core", "url": "https://nf-co.re/" - }, - { - "@id": "#max.u.garcia@gmail.com", - "@type": "Person", - "email": "max.u.garcia@gmail.com", - "name": "Maxime U Garcia" - }, - { - "@id": "https://orcid.org/0000-0001-7313-3734", - "@type": "Person", - "email": "20065894+ramprasadn@users.noreply.github.com", - "name": "Ramprasad Neethiraj" } ] } \ No newline at end of file diff --git a/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf b/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf index 505b582..7ec8e41 100644 --- a/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_createpanelrefs_pipeline/main.nf @@ -63,7 +63,7 @@ workflow PIPELINE_INITIALISATION { \033[0;35m nf-core/createpanelrefs ${workflow.manifest.version}\033[0m -\033[2m----------------------------------------------------\033[0m- """ - after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/', '')}" }.join("\n")}${workflow.manifest.doi ? "\n" : ""} + after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { doi -> " https://doi.org/${doi.trim().replace('https://doi.org/','')}"}.join("\n")}${workflow.manifest.doi ? "\n" : ""} * The nf-core framework https://doi.org/10.1038/s41587-020-0439-x diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index bfd2587..2f30e9a 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -98,7 +98,7 @@ def workflowVersionToYAML() { // Get channel of software versions used in pipeline in YAML format // def softwareVersionsToYAML(ch_versions) { - return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(channel.of(workflowVersionToYAML())) } // From 1168e3fd768e5fa5898f8ef27628e8806cccf947 Mon Sep 17 00:00:00 2001 From: Felix Lenner <52530259+fellen31@users.noreply.github.com> Date: Wed, 11 Feb 2026 15:07:44 +0100 Subject: [PATCH 234/234] Merge pull request #66 from nf-core/lrs-gens-pon Long-read GENS PON --- CHANGELOG.md | 2 + conf/modules/gens_pon.config | 66 +- docs/output.md | 3 + docs/usage.md | 4 + main.nf | 4 +- modules.json | 10 + modules/nf-core/cat/cat/environment.yml | 7 + modules/nf-core/cat/cat/main.nf | 78 + modules/nf-core/cat/cat/meta.yml | 63 + modules/nf-core/cat/cat/tests/main.nf.test | 192 +++ .../nf-core/cat/cat/tests/main.nf.test.snap | 283 ++++ modules/nf-core/cat/cat/tests/nextflow.config | 6 + modules/nf-core/mosdepth/environment.yml | 9 + modules/nf-core/mosdepth/main.nf | 77 + modules/nf-core/mosdepth/meta.yml | 211 +++ modules/nf-core/mosdepth/tests/main.nf.test | 268 +++ .../nf-core/mosdepth/tests/main.nf.test.snap | 1450 +++++++++++++++++ .../nf-core/mosdepth/tests/nextflow.config | 5 + nextflow.config | 1 + nextflow_schema.json | 6 + subworkflows/local/gens_pon/main.nf | 99 +- tests/csv/1.0.0/bam.csv | 4 +- tests/csv/1.0.0/bam_sorted.csv | 4 +- tests/gens_pon.nf.test | 35 + tests/gens_pon.nf.test.snap | 67 +- tests/germlinecnvcaller_cohort.nf.test.snap | 8 +- tests/mutect2.nf.test.snap | 20 +- workflows/createpanelrefs.nf | 2 + 28 files changed, 2937 insertions(+), 47 deletions(-) create mode 100644 modules/nf-core/cat/cat/environment.yml create mode 100644 modules/nf-core/cat/cat/main.nf create mode 100644 modules/nf-core/cat/cat/meta.yml create mode 100644 modules/nf-core/cat/cat/tests/main.nf.test create mode 100644 modules/nf-core/cat/cat/tests/main.nf.test.snap create mode 100644 modules/nf-core/cat/cat/tests/nextflow.config create mode 100644 modules/nf-core/mosdepth/environment.yml create mode 100644 modules/nf-core/mosdepth/main.nf create mode 100644 modules/nf-core/mosdepth/meta.yml create mode 100644 modules/nf-core/mosdepth/tests/main.nf.test create mode 100644 modules/nf-core/mosdepth/tests/main.nf.test.snap create mode 100644 modules/nf-core/mosdepth/tests/nextflow.config diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e8b164..1a2ffd6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#54](https://github.com/nf-core/createpanelrefs/pull/54) - Update nft-utils to 0.0.4 - [#55](https://github.com/nf-core/createpanelrefs/pull/55) - Prepare relase 1.0.0 - [#63](https://github.com/nf-core/createpanelrefs/pull/63) - Template update for nf-core/tools v3.5.0dev +- [#66](https://github.com/nf-core/createpanelrefs/pull/66) - Update `GENS` to allow for creating a long-read PON ### `Fixed` @@ -56,6 +57,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n | gatk4 | | 4.6.2.0 | | gawk | | 5.3.0 | | htslib | | 1.22.1 | +| mosdepth | | 0.3.11 | | multiqc | | 1.32 | | samtools | | 1.22.1 | diff --git a/conf/modules/gens_pon.config b/conf/modules/gens_pon.config index e0d99c8..d7e7202 100644 --- a/conf/modules/gens_pon.config +++ b/conf/modules/gens_pon.config @@ -17,22 +17,70 @@ process { ] } + withName: '.*GENS_PON:CAT_CAT' { + ext.prefix = { "${meta.id}_concat" } + } + withName: '.*GENS_PON:GATK4_COLLECTREADCOUNTS' { - ext.args = {"--format ${params.gens_readcount_format} --imr OVERLAPPING_ONLY"} + ext.args = { "--format ${params.gens_readcount_format} --imr OVERLAPPING_ONLY" } publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/gens_pon/readcounts" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode, + path: { "${params.outdir}/gens_pon/readcounts" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*GENS_PON:INTERVAL_LIST_TO_BED' { + ext.prefix = { "gens_coverage_bins" } + ext.suffix = "bed" + ext.args2 = '\'BEGIN { FS=OFS="\t" } $2 < $3 { print $1, $2, $3 }\'' + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/references/intervals/gens_pon/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } withName: '.*GENS_PON:GATK4_CREATEREADCOUNTPANELOFNORMALS' { - ext.args = { ["--minimum-interval-median-percentile ${params.gens_min_interval_median_percentile}", - "--maximum-chunk-size ${params.gens_maximum_chunk_size}"].join(" ")} + ext.args = { + [ + "--minimum-interval-median-percentile ${params.gens_min_interval_median_percentile}", + "--maximum-chunk-size ${params.gens_maximum_chunk_size}", + ].join(" ") + } publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/gens_pon/createreadcountpanelofnormals" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode, + path: { "${params.outdir}/gens_pon/createreadcountpanelofnormals" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } + + withName: '.*GENS_PON:MOSDEPTH' { + ext.args = { + [ + '--no-per-base', + '--fast-mode', + ].join(' ') + } + } + + withName: '.*GENS_PON:MOSDEPTH_GATK_FORMAT' { + ext.prefix = { "${meta.id}_gatk_formatted" } + ext.suffix = "gatk_format.tsv" + ext.args = { "-v sample=${meta.id}" } + ext.args2 = '\'BEGIN { OFS="\\t" } { $4 = int($4 + 0.5); $2++; print $1, $2, $3, $4 }\'' + } + + withName: '.*GENS_PON:MOSDEPTH_GATK_HEADER' { + // This requires meta.id to be the same as the sample name in the BAM/CRAM header + ext.args = { "-v sample=${meta.id}" } + ext.args2 = '\'{ print } END { print "@RG\\tID:GATKCopyNumber\\tSM:" sample; print "CONTIG\\tSTART\\tEND\\tCOUNT" }\'' + ext.prefix = { "${meta.id}" } + ext.suffix = 'mosdepth_gatk_header.tsv' + } + + withName: '.*GENS_PON:SAMTOOLS_VIEW' { + ext.args = '-H --output-fmt sam' + ext.prefix = { "${meta.id}.mosdepth_gatk_header" } + } } diff --git a/docs/output.md b/docs/output.md index 3480956..e2639a1 100644 --- a/docs/output.md +++ b/docs/output.md @@ -88,11 +88,14 @@ The reference file contains coverage information normalized across the cohort an - `{pon_name}.hdf5`: Final panel of normals file in HDF5 format. - `references/intervals/gens_pon/` - `*.interval_list`: Interval list file used for read count collection. + - `*.bed`: BED versions of interval list file used for read count collection for long-reads.
    [GENS](https://github.com/Clinical-Genomics-Lund/gens) creates a panel of normals for read-count denoising to improve somatic variant detection. The workflow: (1) indexes BAM/CRAM files if needed, (2) collects read counts at specified intervals using GATK's CollectReadCounts, and (3) creates a panel of normals using GATK's CreateReadCountPanelOfNormals. This panel can be used with GENS for somatic variant calling to reduce technical noise and improve variant detection sensitivity. +When `gens_analysis_type` is set to 'lrs', a modified version of the workflow above is run where coverage calculated by mosdepth is used instead of read counts. + ### MultiQC
    diff --git a/docs/usage.md b/docs/usage.md index ebe09a8..c9d6da6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -131,6 +131,7 @@ If you are running the pipeline to generate references for the GENS workflow, yo | ------------- | --------------------- | | fasta/genomes | fai | | | dict | +| | gens_analysis_type | | | gens_bin_length | | | gens_pon_name | | | gens_readcount_format | @@ -138,6 +139,9 @@ If you are running the pipeline to generate references for the GENS workflow, yo The GENS workflow creates a panel of normals for read-count denoising from normal samples. This panel can be used with GENS for somatic variant calling to reduce technical noise and improve variant detection. For more information, see the [GENS documentation](https://github.com/Clinical-Genomics-Lund/gens). +> [!NOTE] +> If `--gens_analysis_type` is set to 'lrs', this reqires the sample ID set in the sample sheet to be equal to the `SM` tag in the BAM-file. + ### germlinecnvcaller If you are running the pipeline to generate references for the GATK's germlinecnvcalling workflow, you should ensure that you have provided all the mandatory options specified in the table below. diff --git a/main.nf b/main.nf index 0ba1739..2b41c60 100644 --- a/main.nf +++ b/main.nf @@ -137,6 +137,7 @@ workflow { PIPELINE_INITIALISATION.out.samplesheet, params.tools ?: "no_tools", params.gcnv_model_name, + params.gens_analysis_type, params.gens_pon_name, params.mutect2_pon_name, fasta, @@ -232,6 +233,7 @@ workflow NFCORE_CREATEPANELREFS { samplesheet // channel: samplesheet read in from --input tools // string: comma separated list of tools to run gcnv_model_name // string: name of gcnv model + gens_analysis_type // string: type of analysis for gens pon ('lrs' or 'srs') gens_pon_name // string: name of gens pon mutect2_pon_name // string: name of mutect2 pon fasta // channel: [meta, fasta] @@ -250,7 +252,7 @@ workflow NFCORE_CREATEPANELREFS { main: // WORKFLOW: Run pipeline - CREATEPANELREFS(samplesheet, tools, gcnv_model_name, gens_pon_name, mutect2_pon_name, fasta, dict, fai, cnvkit_targets, gcnv_exclude_bed, gcnv_exclude_interval_list, gcnv_mappable_regions, gcnv_ploidy_priors, gcnv_segmental_duplications, gcnv_target_bed, gcnv_target_interval_list, gens_interval_list, mutect2_target_bed) + CREATEPANELREFS(samplesheet, tools, gcnv_model_name, gens_analysis_type, gens_pon_name, mutect2_pon_name, fasta, dict, fai, cnvkit_targets, gcnv_exclude_bed, gcnv_exclude_interval_list, gcnv_mappable_regions, gcnv_ploidy_priors, gcnv_segmental_duplications, gcnv_target_bed, gcnv_target_interval_list, gens_interval_list, mutect2_target_bed) emit: versions = CREATEPANELREFS.out.versions // channel: versions.yml diff --git a/modules.json b/modules.json index ef16fa5..96ef637 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "cat/cat": { + "branch": "master", + "git_sha": "69614d4579a6bd9b8a2ecffb35959809d9c36559", + "installed_by": ["modules"] + }, "cnvkit/batch": { "branch": "master", "git_sha": "09223d6de1dab602242c4c57ab2a4599d460e528", @@ -85,6 +90,11 @@ "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", "installed_by": ["modules"] }, + "mosdepth": { + "branch": "master", + "git_sha": "6832b69ef7f98c54876d6436360b6b945370c615", + "installed_by": ["modules"] + }, "multiqc": { "branch": "master", "git_sha": "af27af1be706e6a2bb8fe454175b0cdf77f47b49", diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 0000000..9851176 --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 0000000..aa72fc4 --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,78 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.8' : + 'biocontainers/pigz:2.8' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + tuple val("${task.process}"), val("pigz"), eval("pigz --version 2>&1 | sed 's/pigz //g'"), topic: versions, emit: versions_cat + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // choose appropriate concatenation tool depending on input and output format + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 0000000..36a7359 --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,63 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" + ontologies: [] +output: + file_out: + - - meta: + type: map + description: Groovy Map containing sample information + - ${prefix}: + type: file + description: Concatenated file. Will be gzipped if file_out ends with + ".gz" + pattern: "${file_out}" + ontologies: [] + versions_cat: + - - ${task.process}: + type: string + description: The name of the process + - pigz: + type: string + description: The name of the tool + - "pigz --version 2>&1 | sed 's/pigz //g'": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - pigz: + type: string + description: The name of the tool + - "pigz --version 2>&1 | sed 's/pigz //g'": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 0000000..030c664 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,192 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("sarscov2 - genome - error: name conflict") { + when { + process { + """ + input[0] = + [ + [ id:'genome', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ fasta, sizes ] - unzipped") { + when { + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("sarscov2 - [ gff3_gz, maf_gz ] - zipped") { + when { + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() + } + ) + } + } + + test("sarscov2 - [ gff3_gz, maf_gz ] - unzipped") { + config './nextflow.config' + + when { + params { + cat_prefix = "cat.txt" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ fasta, sizes ] - zipped") { + config './nextflow.config' + + when { + params { + cat_prefix = "cat.txt.gz" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - fasta - zipped") { + config './nextflow.config' + + when { + params { + cat_prefix = "cat.txt.gz" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - fasta - unzipped - stub") { + options "-stub" + + when { + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 0000000..5b4e4cc --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,283 @@ +{ + "sarscov2 - [ gff3_gz, maf_gz ] - unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T09:08:31.479828" + }, + "sarscov2 - fasta - unzipped - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T16:16:28.118094" + }, + "sarscov2 - [ fasta, sizes ] - zipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T16:15:56.529595" + }, + "sarscov2 - genome - error: name conflict": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "file_out": [ + + ], + "versions_cat": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T16:14:54.496538" + }, + "sarscov2 - [ fasta, sizes ] - unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T11:26:29.942203" + }, + "sarscov2 - [ gff3_gz, maf_gz ] - zipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T11:26:45.679401" + }, + "sarscov2 - fasta - zipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,6e9fe4042a72f2345f644f239272b7e6" + ] + ], + "1": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt.gz:md5,6e9fe4042a72f2345f644f239272b7e6" + ] + ], + "versions_cat": [ + [ + "CAT_CAT", + "pigz", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T16:16:12.439911" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow.config b/modules/nf-core/cat/cat/tests/nextflow.config new file mode 100644 index 0000000..5bc9bf5 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = "${params.cat_prefix}" + } +} diff --git a/modules/nf-core/mosdepth/environment.yml b/modules/nf-core/mosdepth/environment.yml new file mode 100644 index 0000000..1c7f3ee --- /dev/null +++ b/modules/nf-core/mosdepth/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/mosdepth + - htslib=1.22.1 + - mosdepth=0.3.11=h0ec343a_1 diff --git a/modules/nf-core/mosdepth/main.nf b/modules/nf-core/mosdepth/main.nf new file mode 100644 index 0000000..63739bf --- /dev/null +++ b/modules/nf-core/mosdepth/main.nf @@ -0,0 +1,77 @@ +process MOSDEPTH { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/00/00d32b53160c26794959da7303ee6e2107afd4d292060c9f287b0af1fddbd847/data' : + 'community.wave.seqera.io/library/mosdepth_htslib:0f58993cb6d93294'}" + + input: + tuple val(meta), path(bam), path(bai), path(bed) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path('*.global.dist.txt') , emit: global_txt + tuple val(meta), path('*.summary.txt') , emit: summary_txt + tuple val(meta), path('*.region.dist.txt') , optional:true, emit: regions_txt + tuple val(meta), path('*.per-base.d4') , optional:true, emit: per_base_d4 + tuple val(meta), path('*.per-base.bed.gz') , optional:true, emit: per_base_bed + tuple val(meta), path('*.per-base.bed.gz.csi') , optional:true, emit: per_base_csi + tuple val(meta), path('*.regions.bed.gz') , optional:true, emit: regions_bed + tuple val(meta), path('*.regions.bed.gz.csi') , optional:true, emit: regions_csi + tuple val(meta), path('*.quantized.bed.gz') , optional:true, emit: quantized_bed + tuple val(meta), path('*.quantized.bed.gz.csi') , optional:true, emit: quantized_csi + tuple val(meta), path('*.thresholds.bed.gz') , optional:true, emit: thresholds_bed + tuple val(meta), path('*.thresholds.bed.gz.csi'), optional:true, emit: thresholds_csi + tuple val("${task.process}"), val('mosdepth'), eval("mosdepth --version | sed 's/mosdepth //g'"), topic: versions, emit: versions_mosdepth + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--fasta ${fasta}" : "" + def interval = bed ? "--by ${bed}" : "" + if (bed && (args.contains("--by") || args.contains("-b "))) { + error "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" + } + if (args.contains("--thresholds") && !(bed || args.contains("--by") || args.contains("-b "))) { + error "'--thresholds' can only be specified in conjunction with '--by' or an input bed file" + } + + """ + mosdepth \\ + --threads $task.cpus \\ + $interval \\ + $reference \\ + $args \\ + $prefix \\ + $bam + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (bed && (args.contains("--by") || args.contains("-b "))) { + error "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" + } + if (args.contains("--thresholds") && !(bed || args.contains("--by") || args.contains("-b "))) { + error "'--thresholds' can only be specified in conjunction with '--by' or an input bed file" + } + """ + touch ${prefix}.global.dist.txt + touch ${prefix}.region.dist.txt + touch ${prefix}.summary.txt + touch ${prefix}.per-base.d4 + echo "" | gzip > ${prefix}.per-base.bed.gz + touch ${prefix}.per-base.bed.gz.csi + echo "" | gzip > ${prefix}.regions.bed.gz + touch ${prefix}.regions.bed.gz.csi + echo "" | gzip > ${prefix}.quantized.bed.gz + touch ${prefix}.quantized.bed.gz.csi + echo "" | gzip > ${prefix}.thresholds.bed.gz + touch ${prefix}.thresholds.bed.gz.csi + """ +} diff --git a/modules/nf-core/mosdepth/meta.yml b/modules/nf-core/mosdepth/meta.yml new file mode 100644 index 0000000..04c8bfe --- /dev/null +++ b/modules/nf-core/mosdepth/meta.yml @@ -0,0 +1,211 @@ +name: mosdepth +description: Calculates genome-wide sequencing coverage. +keywords: + - mosdepth + - bam + - cram + - coverage +tools: + - mosdepth: + description: | + Fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. + documentation: https://github.com/brentp/mosdepth + doi: 10.1093/bioinformatics/btx699 + licence: ["MIT"] + identifier: biotools:mosdepth +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM/CRAM file + pattern: "*.{bam,cram}" + ontologies: [] + - bai: + type: file + description: Index for BAM/CRAM file + pattern: "*.{bai,crai}" + ontologies: [] + - bed: + type: file + description: BED file with intersected intervals + pattern: "*.{bed}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing bed information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta}" + ontologies: [] +output: + global_txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.global.dist.txt": + type: file + description: Text file with global cumulative coverage distribution + pattern: "*.{global.dist.txt}" + ontologies: [] + summary_txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.summary.txt": + type: file + description: Text file with summary mean depths per chromosome and regions + pattern: "*.{summary.txt}" + ontologies: [] + regions_txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.region.dist.txt": + type: file + description: Text file with region cumulative coverage distribution + pattern: "*.{region.dist.txt}" + ontologies: [] + per_base_d4: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.per-base.d4": + type: file + description: D4 file with per-base coverage + pattern: "*.{per-base.d4}" + ontologies: [] + per_base_bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.per-base.bed.gz": + type: file + description: BED file with per-base coverage + pattern: "*.{per-base.bed.gz}" + ontologies: [] + per_base_csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.per-base.bed.gz.csi": + type: file + description: Index file for BED file with per-base coverage + pattern: "*.{per-base.bed.gz.csi}" + ontologies: [] + regions_bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.regions.bed.gz": + type: file + description: BED file with per-region coverage + pattern: "*.{regions.bed.gz}" + ontologies: [] + regions_csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.regions.bed.gz.csi": + type: file + description: Index file for BED file with per-region coverage + pattern: "*.{regions.bed.gz.csi}" + ontologies: [] + quantized_bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.quantized.bed.gz": + type: file + description: BED file with binned coverage + pattern: "*.{quantized.bed.gz}" + ontologies: [] + quantized_csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.quantized.bed.gz.csi": + type: file + description: Index file for BED file with binned coverage + pattern: "*.{quantized.bed.gz.csi}" + ontologies: [] + thresholds_bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.thresholds.bed.gz": + type: file + description: BED file with the number of bases in each region that are covered + at or above each threshold + pattern: "*.{thresholds.bed.gz}" + ontologies: [] + thresholds_csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.thresholds.bed.gz.csi": + type: file + description: Index file for BED file with threshold coverage + pattern: "*.{thresholds.bed.gz.csi}" + ontologies: [] + versions_mosdepth: + - - ${task.process}: + type: string + description: The process the versions were collected from + - mosdepth: + type: string + description: The tool name + - "mosdepth --version | sed 's/mosdepth //g'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - mosdepth: + type: string + description: The tool name + - "mosdepth --version | sed 's/mosdepth //g'": + type: string + description: The command used to generate the version of the tool +authors: + - "@joseespinosa" + - "@drpatelh" + - "@ramprasadn" + - "@matthdsm" +maintainers: + - "@joseespinosa" + - "@ramprasadn" + - "@matthdsm" diff --git a/modules/nf-core/mosdepth/tests/main.nf.test b/modules/nf-core/mosdepth/tests/main.nf.test new file mode 100644 index 0000000..b05dde5 --- /dev/null +++ b/modules/nf-core/mosdepth/tests/main.nf.test @@ -0,0 +1,268 @@ +nextflow_process { + + name "Test Process MOSDEPTH" + script "../main.nf" + process "MOSDEPTH" + + tag "modules" + tag "modules_nfcore" + tag "mosdepth" + config "./nextflow.config" + + test("homo_sapiens - bam, bai, []") { + + when { + params { + module_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - bam, bai, bed") { + + when { + params { + module_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - cram, crai, []") { + + when { + params { + module_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - cram, crai, bed") { + + when { + params { + module_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - bam, bai, [] - window") { + + when { + params { + module_args = "--by 100" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - bam, bai, [] - quantized") { + + when { + params { + module_args = "--quantize 0:1:4:100:200" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - bam, bai, bed - thresholds") { + + when { + params { + module_args = "--thresholds 1,10,20,30" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + + test("homo_sapiens - bam, bai, bed - fail") { + + when { + params { + module_args = "--by 100" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.failed + } + + } + + test("homo_sapiens - bam, bai, [] - stub") { + + options "-stub" + when { + params { + module_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + + } + +} diff --git a/modules/nf-core/mosdepth/tests/main.nf.test.snap b/modules/nf-core/mosdepth/tests/main.nf.test.snap new file mode 100644 index 0000000..c27fcc7 --- /dev/null +++ b/modules/nf-core/mosdepth/tests/main.nf.test.snap @@ -0,0 +1,1450 @@ +{ + "homo_sapiens - bam, bai, [] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.global.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.region.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.d4:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.global.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "per_base_d4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.d4:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "quantized_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "quantized_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.region.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "thresholds_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "thresholds_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:06:13.219131" + }, + "homo_sapiens - cram, crai, bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:14.011309" + }, + "homo_sapiens - bam, bai, [] - quantized": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,f037c215449d361112efc10108fcc17c" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,c0a3176a59010639455a4aefb3f247ef" + ] + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,f037c215449d361112efc10108fcc17c" + ] + ], + "quantized_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,c0a3176a59010639455a4aefb3f247ef" + ] + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:22.818082" + }, + "homo_sapiens - bam, bai, bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:04.449943" + }, + "homo_sapiens - bam, bai, [] - window": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,0b6ea9f0da1228252d9aef2d3b6f7f76" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,34f48d16fcdd61e44d812e29e02c77b8" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,17a2cbe22a948d7c004b90a1f28347a1" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,34f48d16fcdd61e44d812e29e02c77b8" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,17a2cbe22a948d7c004b90a1f28347a1" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,0b6ea9f0da1228252d9aef2d3b6f7f76" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:18.435089" + }, + "homo_sapiens - bam, bai, []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:21:59.785829" + }, + "homo_sapiens - cram, crai, []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:09.294766" + }, + "homo_sapiens - bam, bai, bed - thresholds": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,fe70ae728cd10726c42a2bcd44adfc9d" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,2c52ab89e7496af475de3cb2ca04c7b3" + ] + ], + "12": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6adccf94ed775c9f53422e3e9c7af27f" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,c33ac5c86370039463796f01434fc0e4" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,fe70ae728cd10726c42a2bcd44adfc9d" + ] + ], + "thresholds_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,2c52ab89e7496af475de3cb2ca04c7b3" + ] + ], + "versions_mosdepth": [ + [ + "MOSDEPTH", + "mosdepth", + "0.3.11" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T13:22:27.300204" + } +} \ No newline at end of file diff --git a/modules/nf-core/mosdepth/tests/nextflow.config b/modules/nf-core/mosdepth/tests/nextflow.config new file mode 100644 index 0000000..b21c05b --- /dev/null +++ b/modules/nf-core/mosdepth/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: "MOSDEPTH" { + ext.args = params.module_args + } +} diff --git a/nextflow.config b/nextflow.config index 2fcd3c2..fbbc363 100644 --- a/nextflow.config +++ b/nextflow.config @@ -37,6 +37,7 @@ params { gens_min_interval_median_percentile = 5.0 gens_pon_name = 'gens' gens_readcount_format = 'HDF5' + gens_analysis_type = 'srs' // CNVkit options cnvkit_targets = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 0b43a3f..458cfdf 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -217,6 +217,12 @@ "description": "Options used by the gens subworkflow", "default": "", "properties": { + "gens_analysis_type": { + "type": "string", + "description": "GENS panel of normals analysis type ('srs' or 'lrs').", + "default": "srs", + "enum": ["srs", "lrs"] + }, "gens_bin_length": { "type": "number", "default": 100, diff --git a/subworkflows/local/gens_pon/main.nf b/subworkflows/local/gens_pon/main.nf index 99fb8e7..db3c2f1 100644 --- a/subworkflows/local/gens_pon/main.nf +++ b/subworkflows/local/gens_pon/main.nf @@ -1,10 +1,17 @@ include { GATK4_COLLECTREADCOUNTS } from '../../../modules/nf-core/gatk4/collectreadcounts' include { GATK4_CREATEREADCOUNTPANELOFNORMALS } from '../../../modules/nf-core/gatk4/createreadcountpanelofnormals' include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index' +include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view' +include { MOSDEPTH } from '../../../modules/nf-core/mosdepth' +include { GAWK as MOSDEPTH_GATK_HEADER } from '../../../modules/nf-core/gawk' +include { GAWK as MOSDEPTH_GATK_FORMAT } from '../../../modules/nf-core/gawk' +include { GAWK as INTERVAL_LIST_TO_BED } from '../../../modules/nf-core/gawk' +include { CAT_CAT } from '../../../modules/nf-core/cat/cat' workflow GENS_PON { take: ch_input // channel: [mandatory] [ val(meta), path(bam/cram), path(bai/crai) ] + val_analysis_type // string: [mandatory] type of analysis ('lrs' or 'srs') val_pon_name // string: [optional] name for panel of normals ch_dict // channel: [optional] [ val(meta), path(dict) ] ch_fai // channel: [optional] [ val(meta), path(fai) ] @@ -13,6 +20,7 @@ workflow GENS_PON { main: versions = channel.empty() + ch_readcounts_out = channel.empty() // Filter out files that lack indices, and generate them ch_input @@ -25,6 +33,7 @@ workflow GENS_PON { .set { ch_for_mix } SAMTOOLS_INDEX(ch_for_mix.alignment_without_index) + versions = versions.mix(SAMTOOLS_INDEX.out.versions) SAMTOOLS_INDEX.out.bai .mix(SAMTOOLS_INDEX.out.crai) @@ -34,25 +43,89 @@ workflow GENS_PON { ch_for_mix.alignment_without_index .join(ch_index) .mix(ch_for_mix.alignment_with_index) - .combine(ch_interval_list.map { it -> it[1] }) - .set { ch_readcounts_in } + .set { ch_bam_bai } + + if (val_analysis_type == 'srs') { + ch_bam_bai + .combine(ch_interval_list.map { _meta, interval_list -> interval_list }) + .set { ch_readcounts_in } + + // Collect read counts, and generate models + GATK4_COLLECTREADCOUNTS(ch_readcounts_in, ch_fasta, ch_fai, ch_dict) + versions = versions.mix(GATK4_COLLECTREADCOUNTS.out.versions) + + GATK4_COLLECTREADCOUNTS.out.tsv + .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) + .set { ch_readcounts } - // Collect read counts, and generate models - GATK4_COLLECTREADCOUNTS(ch_readcounts_in, ch_fasta, ch_fai, ch_dict) + } else if (val_analysis_type == 'lrs') { - GATK4_COLLECTREADCOUNTS.out.tsv - .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) - .collect { it[1] } - .map { it -> - return [[id: val_pon_name], it] + INTERVAL_LIST_TO_BED( + ch_interval_list, [], [] + ) + versions = versions.mix(INTERVAL_LIST_TO_BED.out.versions) + + ch_bam_bai + .combine(INTERVAL_LIST_TO_BED.out.output) + .map { meta, bam, bai, _bins_meta, bins -> + [meta, bam, bai, bins] } - .set { ch_readcounts_out } + .set { ch_mosdepth_in } + + // Prepare the body + MOSDEPTH( + ch_mosdepth_in, + [[],[]] + ) + + // Prepare the header + SAMTOOLS_VIEW( + ch_bam_bai, + [[],[]], + [], + false + ) + versions = versions.mix(SAMTOOLS_VIEW.out.versions) + + MOSDEPTH_GATK_HEADER( + SAMTOOLS_VIEW.out.sam, + [], + false + ) + versions = versions.mix(MOSDEPTH_GATK_HEADER.out.versions) - GATK4_CREATEREADCOUNTPANELOFNORMALS(ch_readcounts_out) - versions = versions.mix(GATK4_COLLECTREADCOUNTS.out.versions) + MOSDEPTH_GATK_FORMAT( + MOSDEPTH.out.regions_bed, + [], + false + ) + versions = versions.mix(MOSDEPTH_GATK_FORMAT.out.versions) + + // Prepare GATK inputs + MOSDEPTH_GATK_HEADER.out.output + .join(MOSDEPTH_GATK_FORMAT.out.output) + .map { meta, header, body -> [meta, [header, body]] } + .set { ch_cat_in } + + CAT_CAT(ch_cat_in) + + CAT_CAT.out.file_out + .map { meta, gatk_input -> + return [meta, gatk_input] + } + .set { ch_readcounts } + + } + + ch_readcounts + .collect { _meta, readcounts -> readcounts } + .map { readcounts -> [[id: val_pon_name], readcounts] } + .set { ch_create_pon_in } + + GATK4_CREATEREADCOUNTPANELOFNORMALS(ch_create_pon_in) + versions = versions.mix(GATK4_CREATEREADCOUNTPANELOFNORMALS.out.versions) - versions = versions.mix(SAMTOOLS_INDEX.out.versions) emit: genspon = GATK4_CREATEREADCOUNTPANELOFNORMALS.out.pon diff --git a/tests/csv/1.0.0/bam.csv b/tests/csv/1.0.0/bam.csv index b95f604..47a938f 100644 --- a/tests/csv/1.0.0/bam.csv +++ b/tests/csv/1.0.0/bam.csv @@ -1,3 +1,3 @@ sample,bam,bai -sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai -sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai +normal,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai +tumour,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai diff --git a/tests/csv/1.0.0/bam_sorted.csv b/tests/csv/1.0.0/bam_sorted.csv index 46428ed..f3fd62d 100644 --- a/tests/csv/1.0.0/bam_sorted.csv +++ b/tests/csv/1.0.0/bam_sorted.csv @@ -1,3 +1,3 @@ sample,bam,bai -sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai -sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai +testN,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai +testT,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai diff --git a/tests/gens_pon.nf.test b/tests/gens_pon.nf.test index 2411c02..fed4001 100644 --- a/tests/gens_pon.nf.test +++ b/tests/gens_pon.nf.test @@ -21,6 +21,41 @@ nextflow_pipeline { } } + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assert workflow.success + assertAll( + { assert snapshot( + // pipeline versions.yml file for multiqc from which Nextflow and pipeline versions are removed (all from the workflow key) + removeFromYamlMap("${outputDir}/pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", "Workflow"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } + + test("-profile test --tools gens --input tests/csv/1.0.0/bam_sorted.csv --gens_bin_length 100 --gens_pon_name gens_pon --gens_readcount_format TSV --gens_analysis_type lrs") { + + when { + params { + genome = 'GRCh38.chr22.testdata' + gens_analysis_type = 'lrs' + gens_bin_length = 100 + gens_interval_list = null + gens_pon_name = 'gens_pon' + gens_readcount_format = "TSV" + input = "${projectDir}/tests/csv/1.0.0/bam_sorted.csv" + outdir = "$outputDir" + tools = 'gens' + } + } + then { // stable_name: All files + folders in ${params.outdir}/ with a stable name def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) diff --git a/tests/gens_pon.nf.test.snap b/tests/gens_pon.nf.test.snap index 357d60d..1e76976 100644 --- a/tests/gens_pon.nf.test.snap +++ b/tests/gens_pon.nf.test.snap @@ -1,4 +1,59 @@ { + "-profile test --tools gens --input tests/csv/1.0.0/bam_sorted.csv --gens_bin_length 100 --gens_pon_name gens_pon --gens_readcount_format TSV --gens_analysis_type lrs": { + "content": [ + { + "GATK4_CREATEREADCOUNTPANELOFNORMALS": { + "gatk4": "4.6.2.0" + }, + "GATK4_PREPROCESSINTERVALS_GENS": { + "gatk4": "4.6.2.0" + }, + "INTERVAL_LIST_TO_BED": { + "gawk": "5.3.0" + }, + "MOSDEPTH_GATK_FORMAT": { + "gawk": "5.3.0" + }, + "MOSDEPTH_GATK_HEADER": { + "gawk": "5.3.0" + }, + "SAMTOOLS_VIEW": { + "samtools": "1.22.1" + } + }, + [ + "gens_pon", + "gens_pon/createreadcountpanelofnormals", + "gens_pon/createreadcountpanelofnormals/gens_pon.hdf5", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml", + "references", + "references/intervals", + "references/intervals/gens_pon", + "references/intervals/gens_pon/genome.interval_list", + "references/intervals/gens_pon/gens_coverage_bins.bed" + ], + [ + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "gens_coverage_bins.bed:md5,b5c7f328aaf419595302baaa16f5b649" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.12.0" + }, + "timestamp": "2026-02-06T14:48:19.423593928" + }, "-profile test --tools gens --input tests/csv/1.0.0/bam_sorted.csv --gens_bin_length 100 --gens_pon_name gens_pon --gens_readcount_format TSV": { "content": [ { @@ -17,8 +72,8 @@ "gens_pon/createreadcountpanelofnormals", "gens_pon/createreadcountpanelofnormals/gens_pon.hdf5", "gens_pon/readcounts", - "gens_pon/readcounts/sample1.tsv", - "gens_pon/readcounts/sample2.tsv", + "gens_pon/readcounts/testN.tsv", + "gens_pon/readcounts/testT.tsv", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/llms-full.txt", @@ -37,15 +92,15 @@ "references/intervals/gens_pon/genome.interval_list" ], [ - "sample1.tsv:md5,bfceab35109b04317f38f9cc1cde71ca", - "sample2.tsv:md5,7141d08cdc26f6057557be9e23ef4365", + "testN.tsv:md5,bfceab35109b04317f38f9cc1cde71ca", + "testT.tsv:md5,7141d08cdc26f6057557be9e23ef4365", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nextflow": "25.12.0" }, - "timestamp": "2025-10-31T14:33:30.730355032" + "timestamp": "2026-02-06T14:49:45.524921421" } } \ No newline at end of file diff --git a/tests/germlinecnvcaller_cohort.nf.test.snap b/tests/germlinecnvcaller_cohort.nf.test.snap index 35037b8..90d9d7b 100644 --- a/tests/germlinecnvcaller_cohort.nf.test.snap +++ b/tests/germlinecnvcaller_cohort.nf.test.snap @@ -130,8 +130,8 @@ "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_log_mean_bias_t.tsv", "germlinecnvcaller/germlinecnvcaller/2scattered-cnv-model/2scattered-model/std_psi_t_log__.tsv", "germlinecnvcaller/readcounts", - "germlinecnvcaller/readcounts/sample1.hdf5", - "germlinecnvcaller/readcounts/sample2.hdf5", + "germlinecnvcaller/readcounts/testN.hdf5", + "germlinecnvcaller/readcounts/testT.hdf5", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/llms-full.txt", @@ -173,8 +173,8 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nextflow": "25.12.0" }, - "timestamp": "2025-10-31T15:58:51.382285872" + "timestamp": "2026-02-06T15:12:44.760563402" } } \ No newline at end of file diff --git a/tests/mutect2.nf.test.snap b/tests/mutect2.nf.test.snap index e1c5de8..eba04d6 100644 --- a/tests/mutect2.nf.test.snap +++ b/tests/mutect2.nf.test.snap @@ -75,12 +75,9 @@ }, [ "gatk4", - "gatk4/sample1.vcf.gz", - "gatk4/sample1.vcf.gz.stats", - "gatk4/sample1.vcf.gz.tbi", - "gatk4/sample2.vcf.gz", - "gatk4/sample2.vcf.gz.stats", - "gatk4/sample2.vcf.gz.tbi", + "gatk4/normal.vcf.gz", + "gatk4/normal.vcf.gz.stats", + "gatk4/normal.vcf.gz.tbi", "gatk4/test", "gatk4/test.vcf.gz", "gatk4/test.vcf.gz.tbi", @@ -91,6 +88,9 @@ "gatk4/test/chr21$25689498$46709983", "gatk4/test/vcfheader.vcf", "gatk4/test/vidmap.json", + "gatk4/tumour.vcf.gz", + "gatk4/tumour.vcf.gz.stats", + "gatk4/tumour.vcf.gz.tbi", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/llms-full.txt", @@ -105,15 +105,15 @@ "pipeline_info/nf_core_createpanelrefs_software_mqc_versions.yml" ], [ - "sample1.vcf.gz.stats:md5,b569ce66bbffe9588b3d221e821023ee", - "sample2.vcf.gz.stats:md5,76f749c53212d72e98801f6030fbf8a6", + "normal.vcf.gz.stats:md5,b569ce66bbffe9588b3d221e821023ee", + "tumour.vcf.gz.stats:md5,76f749c53212d72e98801f6030fbf8a6", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f" ] ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nextflow": "25.12.0" }, - "timestamp": "2025-10-31T15:08:35.629022911" + "timestamp": "2026-02-06T11:30:00.137671531" } } \ No newline at end of file diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 99a91da..e7b3cd9 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -15,6 +15,7 @@ workflow CREATEPANELREFS { samplesheet // channel: samplesheet read in from --input tools // array: tools to run, or no_tools if none (it's actually comma separated values string, but close enough) gcnv_model_name // string: name of gcnv model + gens_analysis_type // string: type of analysis for gens pon ('lrs' or 'srs') gens_pon_name // string: name of gens pon mutect2_pon_name // string: name of mutect2 pon fasta // channel: [meta, fasta] @@ -123,6 +124,7 @@ workflow CREATEPANELREFS { GENS_PON( gens_input, + gens_analysis_type, gens_pon_name, dict, fai,