Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,15 @@ process {
memory = { 10.GB * task.attempt, 'memory' }
time = { 16.h * task.attempt, 'time' }
}
withName: 'PRIDEPY_FETCH_SDRF' {
errorStrategy = 'retry'
maxRetries = 3
}
withName: 'PRIDEPY_DOWNLOAD_FILE' {
errorStrategy = 'retry'
maxRetries = 3
maxForks = 5
}
withLabel: process_gpu {
ext.use_gpu = { workflow.profile.contains('gpu') }
accelerator = { workflow.profile.contains('gpu') ? 1 : null }
Expand Down
21 changes: 21 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,27 @@ process {

process {

withName: 'SDRF_PIPELINES_PARSE_SDRF' {
publishDir = [
path: {"${params.outdir}/sdrf"},
mode: params.publish_dir_mode,
pattern: '*.tsv'
]
}

withName: 'PRIDEPY_FETCH_SDRF' {
publishDir = [
path: {"${params.outdir}/sdrf"},
mode: params.publish_dir_mode,
pattern: '*.sdrf.tsv'
]
}

withName: 'PRIDEPY_DOWNLOAD_FILE' {
ext.args = '-p ftp'
publishDir = [enabled: false]
}

withName: 'TDF2MZML' {
publishDir = [
enabled: false
Expand Down
33 changes: 33 additions & 0 deletions conf/test_sdrf.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running SDRF/PRIDE input tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a minimal test using an SDRF
sheet as --input. The pipeline fetches the RAW files from PRIDE and converts the
SDRF to a samplesheet + search presets before running the standard workflow.

Use as follows:
nextflow run nf-core/mhcquant -profile test_sdrf,<docker/singularity> --outdir <OUTDIR>

----------------------------------------------------------------------------------------
*/

process {
resourceLimits = [
cpus: 2,
memory: '6.GB',
time: '2.h'
]
}

params {
config_profile_name = 'SDRF test profile'
config_profile_description = 'Minimal SDRF input test dataset to check pipeline function'

// Input data
input = params.pipelines_testdata_base_path + 'mhcquant/testdata/PXD009752.sdrf.tsv'
fasta = params.pipelines_testdata_base_path + 'mhcquant/testdata/UP000005640_9606_500prot.fasta'

// Batch spectra during Comet search to fit within CI memory limits
spectrum_batch_size = 20000
}
15 changes: 15 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,21 @@

> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._

## Input modes

The `--input` parameter accepts three formats:

| Mode | Example | Description |
| ------------------- | ----------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **Samplesheet TSV** | `--input samplesheet.tsv` | A local TSV file listing your MS runs (see [Samplesheet input](#samplesheet-input)). |
| **SDRF file** | `--input experiment.sdrf.tsv` | A local [SDRF-Proteomics](https://github.com/bigbio/proteomics-sample-metadata) file following the [immunopeptidomics template](https://github.com/bigbio/proteomics-sample-metadata/tree/master/templates). Raw files are fetched from PRIDE, search settings and sample metadata are parsed from the SDRF. Requires `--fasta`. |
| **PRIDE accession** | `--input PXD009752` | A PRIDE project accession. The project must include an SDRF file following the [immunopeptidomics template](https://github.com/bigbio/proteomics-sample-metadata/tree/master/templates); both the SDRF and raw files are fetched from PRIDE. Requires `--fasta`. |

For the SDRF and PRIDE accession modes, the pipeline uses [sdrf-pipelines](https://github.com/bigbio/sdrf-pipelines) to translate the SDRF into an mhcquant samplesheet and a search-preset table, then downloads the raw files with [pridepy](https://github.com/bigbio/py-pride-archive-client). The generated samplesheet and presets are published under `<outdir>/sdrf/` for transparency.

> [!NOTE]
> SDRF files must follow the immunopeptidomics template from [bigbio/proteomics-sample-metadata](https://github.com/bigbio/proteomics-sample-metadata/tree/master/templates), and PRIDE accessions must point to a project that contains such an SDRF file — otherwise sample metadata and search parameters cannot be derived. When providing a local `.sdrf.tsv`, the PRIDE accession is inferred from the filename (e.g. `PXD009752.sdrf.tsv`); if your SDRF is named differently, pass the accession via `--input PXD...` instead.

## Samplesheet input

You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a tab-separated file with at least four columns, and a header row as shown in the examples below.
Expand Down
2 changes: 1 addition & 1 deletion modules/local/openmsthirdparty/percolatoradapter/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ process OPENMS_PERCOLATORADAPTER {
tuple val(meta), path("*.idXML") , emit: idxml
tuple val(meta), path("*_percolator_feature_weights.tsv"), emit: feature_weights, optional: true
tuple val("${task.process}"), val('PercolatorAdapter'), eval("PercolatorAdapter 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1"), topic: versions
tuple val("${task.process}"), val('percolator'), eval("percolator -h 2>&1 | grep -E '^Percolator version(.*)' | sed 's/Percolator version //g'"), topic: versions
tuple val("${task.process}"), val('percolator'), eval("percolator -h 2>&1 | grep -E '^Percolator version(.*)' | sed 's/Percolator version //g' | cut -d',' -f1"), topic: versions

when:
task.ext.when == null || task.ext.when
Expand Down
5 changes: 5 additions & 0 deletions modules/local/pridepy/download_file/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::pridepy=0.0.12
37 changes: 37 additions & 0 deletions modules/local/pridepy/download_file/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
process PRIDEPY_DOWNLOAD_FILE {
label 'process_single'
tag "${file_name}"

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pridepy:0.0.12--pyhdfd78af_0' :
'quay.io/biocontainers/pridepy:0.0.12--pyhdfd78af_0' }"

input:
tuple val(meta), val(file_name), val(pride_accession)

output:
tuple val(meta), path("${file_name}"), emit: downloaded_file
tuple val("${task.process}"), val('pridepy'), eval("pip show pridepy 2>/dev/null | grep Version | cut -d' ' -f2"), topic: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
"""
pridepy download-file-by-name \\
-a "${pride_accession}" \\
-f "${file_name}" \\
-o . \\
${args}

# pridepy exits 0 even on download failure — validate file is non-empty
[ -s "${file_name}" ] || { echo "ERROR: Downloaded file ${file_name} is empty"; exit 1; }
"""

stub:
"""
touch "${file_name}"
"""
}
54 changes: 54 additions & 0 deletions modules/local/pridepy/download_file/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: pridepy_download_file
description: Download a single file from the PRIDE Archive by name using pridepy.
keywords:
- pride
- download
- proteomics
- mass spectrometry
tools:
- pridepy:
description: |
Python client library and command line tool for the PRIDE Archive REST API.
Supports downloading files from PRIDE datasets by accession and file name.
homepage: https://github.com/PRIDE-Utilities/pridepy
documentation: https://github.com/PRIDE-Utilities/pridepy
licence: ["Apache-2.0"]
input:
- - meta:
type: map
description: |
Groovy Map containing sample information.
e.g. [ id:'test', sample:'sample1', condition:'A', search_preset:'default' ]
- file_name:
type: string
description: Name of the file to download from the PRIDE Archive dataset.
- pride_accession:
type: string
description: PRIDE Archive accession number (e.g. PXD000001) identifying the dataset.
output:
downloaded_file:
- - meta:
type: map
description: |
Groovy Map containing sample information.
e.g. [ id:'test', sample:'sample1', condition:'A', search_preset:'default' ]
- "${file_name}":
type: file
description: The file downloaded from the PRIDE Archive.
pattern: "*"
topics:
versions:
- - ${task.process}:
type: string
description: The process the versions were collected from
- pridepy:
type: string
description: The tool name
- "pip show pridepy 2>/dev/null | grep Version | cut -d' ' -f2":
type: eval
description: The expression to obtain the version of pridepy

authors:
- "@jonasscheid"
maintainers:
- "@jonasscheid"
5 changes: 5 additions & 0 deletions modules/local/pridepy/fetch_sdrf/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::pridepy=0.0.12
31 changes: 31 additions & 0 deletions modules/local/pridepy/fetch_sdrf/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
process PRIDEPY_FETCH_SDRF {
label 'process_single'
tag "$pride_id"

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pridepy:0.0.12--pyhdfd78af_0' :
'quay.io/biocontainers/pridepy:0.0.12--pyhdfd78af_0' }"

input:
val pride_id

output:
path "*.sdrf.tsv" , emit: sdrf
tuple val("${task.process}"), val('pridepy'), eval("pip show pridepy 2>/dev/null | grep Version | cut -d' ' -f2"), topic: versions

when:
task.ext.when == null || task.ext.when

script:
"""
pridepy stream-files-metadata -a "${pride_id}" -o files_metadata.json
sdrf_name=\$(python3 -c "import json,sys; sdrfs=[f['fileName'] for f in json.load(open('files_metadata.json')) if f['fileName'].endswith('.sdrf.tsv')]; print(sdrfs[0]) if sdrfs else sys.exit('ERROR: No SDRF file found for ${pride_id}')")
pridepy download-file-by-name -a "${pride_id}" -f "\$sdrf_name" -o . -p ftp
"""

stub:
"""
touch ${pride_id}.sdrf.tsv
"""
}
41 changes: 41 additions & 0 deletions modules/local/pridepy/fetch_sdrf/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: pridepy_fetch_sdrf
description: Fetch an SDRF file from the PRIDE Archive for a given project accession.
keywords:
- pride
- sdrf
- proteomics
- download
tools:
- pridepy:
description: |
Python package to access PRIDE Archive data programmatically,
including downloading files and metadata for public proteomics datasets.
homepage: https://github.com/PRIDE-Archive/pridepy
documentation: https://github.com/PRIDE-Archive/pridepy
licence: ["Apache-2.0"]
input:
- - pride_id:
type: string
description: PRIDE Archive project accession (e.g. PXD009752)
output:
sdrf:
- - "*.sdrf.tsv":
type: file
description: SDRF (Sample and Data Relationship Format) file describing the experimental design of the PRIDE project
pattern: "*.sdrf.tsv"
topics:
versions:
- - ${task.process}:
type: string
description: The process the versions were collected from
- pridepy:
type: string
description: The tool name
- "pip show pridepy 2>/dev/null | grep Version | cut -d' ' -f2":
type: eval
description: The expression to obtain the version of the tool

authors:
- "@jonasscheid"
maintainers:
- "@jonasscheid"
5 changes: 5 additions & 0 deletions modules/local/sdrf_pipelines/parse_sdrf/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::sdrf-pipelines=0.1.2
34 changes: 34 additions & 0 deletions modules/local/sdrf_pipelines/parse_sdrf/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
process SDRF_PIPELINES_PARSE_SDRF {
label 'process_single'
tag "${sdrf.baseName}"

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.1.2--pyhdfd78af_0' :
'quay.io/biocontainers/sdrf-pipelines:0.1.2--pyhdfd78af_0' }"

input:
path sdrf

output:
path "samplesheet.tsv" , emit: samplesheet
path "search_presets.tsv" , emit: search_presets
tuple val("${task.process}"), val('sdrf-pipelines'), eval("parse_sdrf --version | cut -d ' ' -f 2"), topic: versions

when:
task.ext.when == null || task.ext.when

script:
"""
parse_sdrf convert-mhcquant \\
-s ${sdrf} \\
-os samplesheet.tsv \\
-op search_presets.tsv
"""

stub:
"""
touch samplesheet.tsv
touch search_presets.tsv
"""
}
57 changes: 57 additions & 0 deletions modules/local/sdrf_pipelines/parse_sdrf/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: sdrf_pipelines_parse_sdrf
description: Converts an SDRF (Sample and Data Relationship Format) file into an
mhcquant-compatible samplesheet and search presets TSV files using sdrf-pipelines.
keywords:
- sdrf
- samplesheet
- proteomics
- immunopeptidomics
- mhcquant
tools:
- "sdrf-pipelines":
description: "A set of tools to validate and convert SDRF files for proteomics
pipelines."
homepage: "https://github.com/bigbio/sdrf-pipelines"
documentation: "https://github.com/bigbio/sdrf-pipelines"
tool_dev_url: "https://github.com/bigbio/sdrf-pipelines"
doi: "10.1021/acs.jproteome.1c00505"
licence: ["Apache-2.0"]
identifier: ""

input:
- - sdrf:
type: file
description: SDRF file describing the experimental design and sample metadata.
pattern: "*.{tsv,sdrf.tsv}"
ontologies: []

output:
samplesheet:
- - "samplesheet.tsv":
type: file
description: mhcquant-compatible samplesheet derived from the SDRF file.
pattern: "samplesheet.tsv"
ontologies: []
search_presets:
- - "search_presets.tsv":
type: file
description: Search parameter presets derived from the SDRF file, one row
per unique search configuration.
pattern: "search_presets.tsv"
ontologies: []
topics:
versions:
- - ${task.process}:
type: string
description: The name of the process
- sdrf-pipelines:
type: string
description: The name of the tool
- "parse_sdrf --version | cut -d ' ' -f 2":
type: eval
description: The expression to obtain the version of the tool

authors:
- "@jonasscheid"
maintainers:
- "@jonasscheid"
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ profiles {
singularity.runOptions = '--nv'
}
test { includeConfig 'conf/test.config' }
test_sdrf { includeConfig 'conf/test_sdrf.config' }
test_mokapot { includeConfig 'conf/test_mokapot.config' }
test_percolator { includeConfig 'conf/test_percolator.config' }
test_ionannotator { includeConfig 'conf/test_ionannotator.config' }
Expand Down
Loading
Loading