From e3bf10fa5c9a8f2dd12eb06f6a68212aff7f4be1 Mon Sep 17 00:00:00 2001 From: Shen-YuFei Date: Tue, 28 Apr 2026 14:08:08 +0800 Subject: [PATCH 1/5] feat(pridepy): add pridepy download module --- modules/bigbio/pridepy/environment.yml | 6 +++ modules/bigbio/pridepy/main.nf | 43 +++++++++++++++++ modules/bigbio/pridepy/meta.yml | 46 +++++++++++++++++++ modules/bigbio/pridepy/tests/main.nf.test | 28 +++++++++++ .../bigbio/pridepy/tests/main.nf.test.snap | 14 ++++++ modules/bigbio/pridepy/tests/nextflow.config | 3 ++ 6 files changed, 140 insertions(+) create mode 100644 modules/bigbio/pridepy/environment.yml create mode 100644 modules/bigbio/pridepy/main.nf create mode 100644 modules/bigbio/pridepy/meta.yml create mode 100644 modules/bigbio/pridepy/tests/main.nf.test create mode 100644 modules/bigbio/pridepy/tests/main.nf.test.snap create mode 100644 modules/bigbio/pridepy/tests/nextflow.config diff --git a/modules/bigbio/pridepy/environment.yml b/modules/bigbio/pridepy/environment.yml new file mode 100644 index 0000000..6fded8f --- /dev/null +++ b/modules/bigbio/pridepy/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::pridepy=0.0.14 diff --git a/modules/bigbio/pridepy/main.nf b/modules/bigbio/pridepy/main.nf new file mode 100644 index 0000000..300a0d2 --- /dev/null +++ b/modules/bigbio/pridepy/main.nf @@ -0,0 +1,43 @@ +process PRIDEPY_DOWNLOAD { + tag "${meta.id}" + label 'process_low' + label 'process_long' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/pridepy:0.0.14--pyhdfd78af_0' + : 'biocontainers/pridepy:0.0.14--pyhdfd78af_0'}" + + input: + val(meta) + + output: + path "output/", emit: download_dir, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir -p output + pridepy ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pridepy: \$(pip show pridepy 2>/dev/null | grep Version | cut -d' ' -f2) + END_VERSIONS + """ + + stub: + """ + mkdir -p output + touch output/${meta.id}.placeholder + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pridepy: 0.0.14 + END_VERSIONS + """ +} diff --git a/modules/bigbio/pridepy/meta.yml b/modules/bigbio/pridepy/meta.yml new file mode 100644 index 0000000..63ada85 --- /dev/null +++ b/modules/bigbio/pridepy/meta.yml @@ -0,0 +1,46 @@ +name: pridepy +description: Python client for PRIDE Archive — download and query proteomics datasets +keywords: + - pride + - download + - proteomics + - raw files + - globus + - aspera +tools: + - pridepy: + description: | + pridepy is a Python client for the PRIDE Archive REST API. + It supports downloading public/private files via FTP, Aspera, Globus (HTTPS), or S3, + as well as querying project metadata and file listings. + The specific sub-command and arguments are passed via task.ext.args. + homepage: https://github.com/PRIDE-Archive/pridepy + documentation: https://github.com/PRIDE-Archive/pridepy + tool_dev_url: https://github.com/PRIDE-Archive/pridepy + doi: "" + licence: + - "Apache-2.0" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing task information. + e.g. `[ id: 'PXD001819' ]` +output: + download_dir: + - "output/": + type: directory + description: Directory containing downloaded files (for download sub-commands) + pattern: "output/" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 +authors: + - "@ypriverol" +maintainers: + - "@ypriverol" diff --git a/modules/bigbio/pridepy/tests/main.nf.test b/modules/bigbio/pridepy/tests/main.nf.test new file mode 100644 index 0000000..682f42f --- /dev/null +++ b/modules/bigbio/pridepy/tests/main.nf.test @@ -0,0 +1,28 @@ +nextflow_process { + + name "Test Process PRIDEPY_DOWNLOAD" + script "../main.nf" + process "PRIDEPY_DOWNLOAD" + tag "modules" + tag "modules_bigbio" + tag "modules_pridepy" + tag "pridepy" + + test("Should run stub mode") { + + options "-stub" + + when { + process { + """ + input[0] = [ id: 'PXD001819' ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out.versions).match("versions_stub") + } + } +} diff --git a/modules/bigbio/pridepy/tests/main.nf.test.snap b/modules/bigbio/pridepy/tests/main.nf.test.snap new file mode 100644 index 0000000..7b690e6 --- /dev/null +++ b/modules/bigbio/pridepy/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "versions_stub": { + "content": [ + [ + "versions.yml:md5,c27ccb04c91f92f188b7f4cf2d54ed49" + ] + ], + "timestamp": "2026-04-28T14:06:27.317530521", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/modules/bigbio/pridepy/tests/nextflow.config b/modules/bigbio/pridepy/tests/nextflow.config new file mode 100644 index 0000000..0293c16 --- /dev/null +++ b/modules/bigbio/pridepy/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } +} From 0fd0a1979541d15bab4bab31470313e83cdb6039 Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Tue, 28 Apr 2026 07:47:24 +0100 Subject: [PATCH 2/5] Update modules/bigbio/pridepy/main.nf Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- modules/bigbio/pridepy/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/bigbio/pridepy/main.nf b/modules/bigbio/pridepy/main.nf index 300a0d2..37c08ab 100644 --- a/modules/bigbio/pridepy/main.nf +++ b/modules/bigbio/pridepy/main.nf @@ -26,7 +26,7 @@ process PRIDEPY_DOWNLOAD { cat <<-END_VERSIONS > versions.yml "${task.process}": - pridepy: \$(pip show pridepy 2>/dev/null | grep Version | cut -d' ' -f2) + pridepy: \$(python -c "from importlib.metadata import version; print(version('pridepy'))" || echo "unknown") END_VERSIONS """ From 50df9f4ebb3c20e62e0d6ece8178b3afec169fe4 Mon Sep 17 00:00:00 2001 From: Shen-YuFei Date: Tue, 28 Apr 2026 14:51:18 +0800 Subject: [PATCH 3/5] fix(pridepy): address copilot review feedback --- modules/bigbio/pridepy/main.nf | 4 ++-- modules/bigbio/pridepy/meta.yml | 23 +++++++++++++---------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/modules/bigbio/pridepy/main.nf b/modules/bigbio/pridepy/main.nf index 37c08ab..c5f23aa 100644 --- a/modules/bigbio/pridepy/main.nf +++ b/modules/bigbio/pridepy/main.nf @@ -12,8 +12,8 @@ process PRIDEPY_DOWNLOAD { val(meta) output: - path "output/", emit: download_dir, optional: true - path "versions.yml", emit: versions + tuple val(meta), path("output/"), emit: download_dir + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/bigbio/pridepy/meta.yml b/modules/bigbio/pridepy/meta.yml index 63ada85..588fddf 100644 --- a/modules/bigbio/pridepy/meta.yml +++ b/modules/bigbio/pridepy/meta.yml @@ -17,22 +17,25 @@ tools: homepage: https://github.com/PRIDE-Archive/pridepy documentation: https://github.com/PRIDE-Archive/pridepy tool_dev_url: https://github.com/PRIDE-Archive/pridepy - doi: "" licence: - "Apache-2.0" identifier: "" input: - - - meta: - type: map - description: | - Groovy Map containing task information. - e.g. `[ id: 'PXD001819' ]` + - meta: + type: map + description: | + Groovy Map containing task information. + e.g. `[ id: 'PXD001819' ]` output: download_dir: - - "output/": - type: directory - description: Directory containing downloaded files (for download sub-commands) - pattern: "output/" + - - meta: + type: map + description: | + Groovy Map containing task information forwarded from input. + - "output/": + type: directory + description: Directory containing downloaded files (for download sub-commands) + pattern: "output/" versions: - versions.yml: type: file From 7328506e3bbe28a52932fe093c811ea1b260b114 Mon Sep 17 00:00:00 2001 From: Shen-YuFei Date: Tue, 28 Apr 2026 16:02:12 +0800 Subject: [PATCH 4/5] fix(pridepy): cd into output dir and quote stub paths --- modules/bigbio/pridepy/main.nf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/bigbio/pridepy/main.nf b/modules/bigbio/pridepy/main.nf index c5f23aa..6807f8c 100644 --- a/modules/bigbio/pridepy/main.nf +++ b/modules/bigbio/pridepy/main.nf @@ -22,7 +22,10 @@ process PRIDEPY_DOWNLOAD { def args = task.ext.args ?: '' """ mkdir -p output - pridepy ${args} + ( + cd output + pridepy ${args} + ) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -33,7 +36,7 @@ process PRIDEPY_DOWNLOAD { stub: """ mkdir -p output - touch output/${meta.id}.placeholder + touch "output/${meta.id}.placeholder" cat <<-END_VERSIONS > versions.yml "${task.process}": From 2694907ec41be68998dab141be326f421e71eba6 Mon Sep 17 00:00:00 2001 From: Shen-YuFei Date: Tue, 28 Apr 2026 18:05:41 +0800 Subject: [PATCH 5/5] fix(pridepy): drop output/ subdir, write to workDir --- modules/bigbio/pridepy/main.nf | 17 ++++---- modules/bigbio/pridepy/meta.yml | 23 ++++++++--- modules/bigbio/pridepy/tests/main.nf.test | 37 ++++++++++++++++- .../bigbio/pridepy/tests/main.nf.test.snap | 40 ++++++++++++++++++- modules/bigbio/pridepy/tests/nextflow.config | 4 ++ 5 files changed, 103 insertions(+), 18 deletions(-) diff --git a/modules/bigbio/pridepy/main.nf b/modules/bigbio/pridepy/main.nf index 6807f8c..57a54ea 100644 --- a/modules/bigbio/pridepy/main.nf +++ b/modules/bigbio/pridepy/main.nf @@ -12,8 +12,9 @@ process PRIDEPY_DOWNLOAD { val(meta) output: - tuple val(meta), path("output/"), emit: download_dir - path "versions.yml", emit: versions + tuple val(meta), path("*.{raw,mzML,mzML.gz,mgf,d.tar,d.tar.gz,d.zip,dia,wiff,wiff.scan}"), emit: spectra, optional: true + tuple val(meta), path("*-checksum.tsv"), emit: checksums, optional: true + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -21,11 +22,7 @@ process PRIDEPY_DOWNLOAD { script: def args = task.ext.args ?: '' """ - mkdir -p output - ( - cd output - pridepy ${args} - ) + pridepy ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -34,9 +31,11 @@ process PRIDEPY_DOWNLOAD { """ stub: + def args = task.ext.args ?: '' + def checksum_touch = args.contains('--checksum-check') ? "touch \"${meta.id}-checksum.tsv\"" : '' """ - mkdir -p output - touch "output/${meta.id}.placeholder" + touch "${meta.id}.raw" + ${checksum_touch} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/bigbio/pridepy/meta.yml b/modules/bigbio/pridepy/meta.yml index 588fddf..6570a05 100644 --- a/modules/bigbio/pridepy/meta.yml +++ b/modules/bigbio/pridepy/meta.yml @@ -27,15 +27,28 @@ input: Groovy Map containing task information. e.g. `[ id: 'PXD001819' ]` output: - download_dir: + spectra: - - meta: type: map description: | Groovy Map containing task information forwarded from input. - - "output/": - type: directory - description: Directory containing downloaded files (for download sub-commands) - pattern: "output/" + - "*.{raw,mzML,mzML.gz,mgf,d.tar,d.tar.gz,d.zip,dia,wiff,wiff.scan}": + type: file + description: | + Mass spectrometry spectra files downloaded from PRIDE Archive + (Thermo .raw, mzML, Bruker .d archives, DIA-NN .dia, Sciex .wiff, etc.). + Emitted to the task workDir; downstream pipelines pick specific files + via publishDir / saveAs in their modules.config. + pattern: "*.{raw,mzML,mzML.gz,mgf,d.tar,d.tar.gz,d.zip,dia,wiff,wiff.scan}" + checksums: + - - meta: + type: map + description: | + Groovy Map containing task information forwarded from input. + - "*-checksum.tsv": + type: file + description: SHA1 checksums file generated by pridepy when --checksum-check is enabled. + pattern: "*-checksum.tsv" versions: - versions.yml: type: file diff --git a/modules/bigbio/pridepy/tests/main.nf.test b/modules/bigbio/pridepy/tests/main.nf.test index 682f42f..1bd59a5 100644 --- a/modules/bigbio/pridepy/tests/main.nf.test +++ b/modules/bigbio/pridepy/tests/main.nf.test @@ -8,7 +8,7 @@ nextflow_process { tag "modules_pridepy" tag "pridepy" - test("Should run stub mode") { + test("Should run stub mode (no checksum)") { options "-stub" @@ -22,7 +22,40 @@ nextflow_process { then { assert process.success - assert snapshot(process.out.versions).match("versions_stub") + assert process.out.spectra.size() == 1 + assert process.out.checksums.size() == 0 + assert snapshot( + process.out.spectra, + process.out.versions + ).match("stub_no_checksum") + } + } + + test("Should run stub mode with --checksum-check") { + + options "-stub" + + when { + process { + """ + input[0] = [ id: 'PXD001819' ] + """ + } + params { + module_args = '--checksum-check' + } + config "./nextflow.config" + } + + then { + assert process.success + assert process.out.spectra.size() == 1 + assert process.out.checksums.size() == 1 + assert snapshot( + process.out.spectra, + process.out.checksums, + process.out.versions + ).match("stub_with_checksum") } } } diff --git a/modules/bigbio/pridepy/tests/main.nf.test.snap b/modules/bigbio/pridepy/tests/main.nf.test.snap index 7b690e6..c591913 100644 --- a/modules/bigbio/pridepy/tests/main.nf.test.snap +++ b/modules/bigbio/pridepy/tests/main.nf.test.snap @@ -1,11 +1,47 @@ { - "versions_stub": { + "stub_no_checksum": { "content": [ + [ + [ + { + "id": "PXD001819" + }, + "PXD001819.raw:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + "versions.yml:md5,c27ccb04c91f92f188b7f4cf2d54ed49" + ] + ], + "timestamp": "2026-04-28T18:15:37.876014654", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "stub_with_checksum": { + "content": [ + [ + [ + { + "id": "PXD001819" + }, + "PXD001819.raw:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + [ + { + "id": "PXD001819" + }, + "PXD001819-checksum.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], [ "versions.yml:md5,c27ccb04c91f92f188b7f4cf2d54ed49" ] ], - "timestamp": "2026-04-28T14:06:27.317530521", + "timestamp": "2026-04-28T18:15:43.996367071", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" diff --git a/modules/bigbio/pridepy/tests/nextflow.config b/modules/bigbio/pridepy/tests/nextflow.config index 0293c16..266db68 100644 --- a/modules/bigbio/pridepy/tests/nextflow.config +++ b/modules/bigbio/pridepy/tests/nextflow.config @@ -1,3 +1,7 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: 'PRIDEPY_DOWNLOAD' { + ext.args = { params.module_args ?: '' } + } }