nf-core · jonasscheid · Apr 22, 2026 · Apr 7, 2026 · Apr 12, 2026 · Apr 12, 2026
@@ -63,6 +63,15 @@ process {
         memory = { 10.GB * task.attempt, 'memory' }
         time   = { 16.h  * task.attempt, 'time'   }
     }
+    withName: 'PRIDEPY_FETCH_SDRF' {
+        errorStrategy = 'retry'
+        maxRetries    = 3
+    }
+    withName: 'PRIDEPY_DOWNLOAD_FILE' {
+        errorStrategy = 'retry'
+        maxRetries    = 3
+        maxForks      = 5
+    }
     withLabel: process_gpu {
         ext.use_gpu = { workflow.profile.contains('gpu') }
         accelerator = { workflow.profile.contains('gpu') ? 1 : null }

@@ -315,6 +315,27 @@ process {
 
 process {
 
+    withName: 'SDRF_PIPELINES_PARSE_SDRF' {
+        publishDir = [
+            path: {"${params.outdir}/sdrf"},
+            mode: params.publish_dir_mode,
+            pattern: '*.tsv'
+        ]
+    }
+
+    withName: 'PRIDEPY_FETCH_SDRF' {
+        publishDir = [
+            path: {"${params.outdir}/sdrf"},
+            mode: params.publish_dir_mode,
+            pattern: '*.sdrf.tsv'
+        ]
+    }
+
+    withName: 'PRIDEPY_DOWNLOAD_FILE' {
+        ext.args   = '-p ftp'
+        publishDir = [enabled: false]
+    }
+
     withName: 'TDF2MZML' {
         publishDir  = [
             enabled: false

@@ -0,0 +1,33 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running SDRF/PRIDE input tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a minimal test using an SDRF
+    sheet as --input. The pipeline fetches the RAW files from PRIDE and converts the
+    SDRF to a samplesheet + search presets before running the standard workflow.
+
+    Use as follows:
+        nextflow run nf-core/mhcquant -profile test_sdrf,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+process {
+    resourceLimits = [
+        cpus: 2,
+        memory: '6.GB',
+        time: '2.h'
+    ]
+}
+
+params {
+    config_profile_name        = 'SDRF test profile'
+    config_profile_description = 'Minimal SDRF input test dataset to check pipeline function'
+
+    // Input data
+    input = params.pipelines_testdata_base_path + 'mhcquant/testdata/PXD009752.sdrf.tsv'
+    fasta = params.pipelines_testdata_base_path + 'mhcquant/testdata/UP000005640_9606_500prot.fasta'
+
+    // Batch spectra during Comet search to fit within CI memory limits
+    spectrum_batch_size        = 20000
+}
@@ -4,6 +4,21 @@
 
 > _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._
 
+## Input modes
+
+The `--input` parameter accepts three formats:
+
+| Mode                | Example                       | Description                                                                                                                                                                                                                                                                                                                      |
+| ------------------- | ----------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **Samplesheet TSV** | `--input samplesheet.tsv`     | A local TSV file listing your MS runs (see [Samplesheet input](#samplesheet-input)).                                                                                                                                                                                                                                             |
+| **SDRF file**       | `--input experiment.sdrf.tsv` | A local [SDRF-Proteomics](https://github.com/bigbio/proteomics-sample-metadata) file following the [immunopeptidomics template](https://github.com/bigbio/proteomics-sample-metadata/tree/master/templates). Raw files are fetched from PRIDE, search settings and sample metadata are parsed from the SDRF. Requires `--fasta`. |
+| **PRIDE accession** | `--input PXD009752`           | A PRIDE project accession. The project must include an SDRF file following the [immunopeptidomics template](https://github.com/bigbio/proteomics-sample-metadata/tree/master/templates); both the SDRF and raw files are fetched from PRIDE. Requires `--fasta`.                                                                 |
+
+For the SDRF and PRIDE accession modes, the pipeline uses [sdrf-pipelines](https://github.com/bigbio/sdrf-pipelines) to translate the SDRF into an mhcquant samplesheet and a search-preset table, then downloads the raw files with [pridepy](https://github.com/bigbio/py-pride-archive-client). The generated samplesheet and presets are published under `<outdir>/sdrf/` for transparency.
+
+> [!NOTE]
+> SDRF files must follow the immunopeptidomics template from [bigbio/proteomics-sample-metadata](https://github.com/bigbio/proteomics-sample-metadata/tree/master/templates), and PRIDE accessions must point to a project that contains such an SDRF file — otherwise sample metadata and search parameters cannot be derived. When providing a local `.sdrf.tsv`, the PRIDE accession is inferred from the filename (e.g. `PXD009752.sdrf.tsv`); if your SDRF is named differently, pass the accession via `--input PXD...` instead.
+
 ## Samplesheet input
 
 You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a tab-separated file with at least four columns, and a header row as shown in the examples below.

@@ -14,7 +14,7 @@ process OPENMS_PERCOLATORADAPTER {
     tuple val(meta), path("*.idXML")                         , emit: idxml
     tuple val(meta), path("*_percolator_feature_weights.tsv"), emit: feature_weights, optional: true
     tuple val("${task.process}"), val('PercolatorAdapter'), eval("PercolatorAdapter 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1"), topic: versions
-    tuple val("${task.process}"), val('percolator'), eval("percolator -h 2>&1 | grep -E '^Percolator version(.*)' | sed 's/Percolator version //g'"), topic: versions
+    tuple val("${task.process}"), val('percolator'), eval("percolator -h 2>&1 | grep -E '^Percolator version(.*)' | sed 's/Percolator version //g' | cut -d',' -f1"), topic: versions
 
     when:
     task.ext.when == null || task.ext.when

@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::pridepy=0.0.12
@@ -0,0 +1,37 @@
+process PRIDEPY_DOWNLOAD_FILE {
+    label 'process_single'
+    tag "${file_name}"
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/pridepy:0.0.12--pyhdfd78af_0' :
+        'quay.io/biocontainers/pridepy:0.0.12--pyhdfd78af_0' }"
+
+    input:
+    tuple val(meta), val(file_name), val(pride_accession)
+
+    output:
+    tuple val(meta), path("${file_name}"), emit: downloaded_file
+    tuple val("${task.process}"), val('pridepy'), eval("pip show pridepy 2>/dev/null | grep Version | cut -d' ' -f2"), topic: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    pridepy download-file-by-name \\
+        -a "${pride_accession}" \\
+        -f "${file_name}" \\
+        -o . \\
+        ${args}
+
+    # pridepy exits 0 even on download failure — validate file is non-empty
+    [ -s "${file_name}" ] || { echo "ERROR: Downloaded file ${file_name} is empty"; exit 1; }
+    """
+
+    stub:
+    """
+    touch "${file_name}"
+    """
+}
@@ -0,0 +1,54 @@
+name: pridepy_download_file
+description: Download a single file from the PRIDE Archive by name using pridepy.
+keywords:
+  - pride
+  - download
+  - proteomics
+  - mass spectrometry
+tools:
+  - pridepy:
+      description: |
+        Python client library and command line tool for the PRIDE Archive REST API.
+        Supports downloading files from PRIDE datasets by accession and file name.
+      homepage: https://github.com/PRIDE-Utilities/pridepy
+      documentation: https://github.com/PRIDE-Utilities/pridepy
+      licence: ["Apache-2.0"]
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information.
+          e.g. [ id:'test', sample:'sample1', condition:'A', search_preset:'default' ]
+    - file_name:
+        type: string
+        description: Name of the file to download from the PRIDE Archive dataset.
+    - pride_accession:
+        type: string
+        description: PRIDE Archive accession number (e.g. PXD000001) identifying the dataset.
+output:
+  downloaded_file:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information.
+            e.g. [ id:'test', sample:'sample1', condition:'A', search_preset:'default' ]
+      - "${file_name}":
+          type: file
+          description: The file downloaded from the PRIDE Archive.
+          pattern: "*"
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The process the versions were collected from
+      - pridepy:
+          type: string
+          description: The tool name
+      - "pip show pridepy 2>/dev/null | grep Version | cut -d' ' -f2":
+          type: eval
+          description: The expression to obtain the version of pridepy
+
+authors:
+  - "@jonasscheid"
+maintainers:
+  - "@jonasscheid"
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::pridepy=0.0.12
@@ -0,0 +1,31 @@
+process PRIDEPY_FETCH_SDRF {
+    label 'process_single'
+    tag "$pride_id"
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/pridepy:0.0.12--pyhdfd78af_0' :
+        'quay.io/biocontainers/pridepy:0.0.12--pyhdfd78af_0' }"
+
+    input:
+    val pride_id
+
+    output:
+    path "*.sdrf.tsv" , emit: sdrf
+    tuple val("${task.process}"), val('pridepy'), eval("pip show pridepy 2>/dev/null | grep Version | cut -d' ' -f2"), topic: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    """
+    pridepy stream-files-metadata -a "${pride_id}" -o files_metadata.json
+    sdrf_name=\$(python3 -c "import json,sys; sdrfs=[f['fileName'] for f in json.load(open('files_metadata.json')) if f['fileName'].endswith('.sdrf.tsv')]; print(sdrfs[0]) if sdrfs else sys.exit('ERROR: No SDRF file found for ${pride_id}')")
+    pridepy download-file-by-name -a "${pride_id}" -f "\$sdrf_name" -o . -p ftp
+    """
+
+    stub:
+    """
+    touch ${pride_id}.sdrf.tsv
+    """
+}
@@ -0,0 +1,41 @@
+name: pridepy_fetch_sdrf
+description: Fetch an SDRF file from the PRIDE Archive for a given project accession.
+keywords:
+  - pride
+  - sdrf
+  - proteomics
+  - download
+tools:
+  - pridepy:
+      description: |
+        Python package to access PRIDE Archive data programmatically,
+        including downloading files and metadata for public proteomics datasets.
+      homepage: https://github.com/PRIDE-Archive/pridepy
+      documentation: https://github.com/PRIDE-Archive/pridepy
+      licence: ["Apache-2.0"]
+input:
+  - - pride_id:
+        type: string
+        description: PRIDE Archive project accession (e.g. PXD009752)
+output:
+  sdrf:
+    - - "*.sdrf.tsv":
+          type: file
+          description: SDRF (Sample and Data Relationship Format) file describing the experimental design of the PRIDE project
+          pattern: "*.sdrf.tsv"
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The process the versions were collected from
+      - pridepy:
+          type: string
+          description: The tool name
+      - "pip show pridepy 2>/dev/null | grep Version | cut -d' ' -f2":
+          type: eval
+          description: The expression to obtain the version of the tool
+
+authors:
+  - "@jonasscheid"
+maintainers:
+  - "@jonasscheid"
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::sdrf-pipelines=0.1.2
@@ -0,0 +1,34 @@
+process SDRF_PIPELINES_PARSE_SDRF {
+    label 'process_single'
+    tag "${sdrf.baseName}"
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.1.2--pyhdfd78af_0' :
+        'quay.io/biocontainers/sdrf-pipelines:0.1.2--pyhdfd78af_0' }"
+
+    input:
+    path sdrf
+
+    output:
+    path "samplesheet.tsv"    , emit: samplesheet
+    path "search_presets.tsv" , emit: search_presets
+    tuple val("${task.process}"), val('sdrf-pipelines'), eval("parse_sdrf --version | cut -d ' ' -f 2"), topic: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    """
+    parse_sdrf convert-mhcquant \\
+        -s ${sdrf} \\
+        -os samplesheet.tsv \\
+        -op search_presets.tsv
+    """
+
+    stub:
+    """
+    touch samplesheet.tsv
+    touch search_presets.tsv
+    """
+}
@@ -0,0 +1,57 @@
+name: sdrf_pipelines_parse_sdrf
+description: Converts an SDRF (Sample and Data Relationship Format) file into an
+  mhcquant-compatible samplesheet and search presets TSV files using sdrf-pipelines.
+keywords:
+  - sdrf
+  - samplesheet
+  - proteomics
+  - immunopeptidomics
+  - mhcquant
+tools:
+  - "sdrf-pipelines":
+      description: "A set of tools to validate and convert SDRF files for proteomics
+        pipelines."
+      homepage: "https://github.com/bigbio/sdrf-pipelines"
+      documentation: "https://github.com/bigbio/sdrf-pipelines"
+      tool_dev_url: "https://github.com/bigbio/sdrf-pipelines"
+      doi: "10.1021/acs.jproteome.1c00505"
+      licence: ["Apache-2.0"]
+      identifier: ""
+
+input:
+  - - sdrf:
+        type: file
+        description: SDRF file describing the experimental design and sample metadata.
+        pattern: "*.{tsv,sdrf.tsv}"
+        ontologies: []
+
+output:
+  samplesheet:
+    - - "samplesheet.tsv":
+          type: file
+          description: mhcquant-compatible samplesheet derived from the SDRF file.
+          pattern: "samplesheet.tsv"
+          ontologies: []
+  search_presets:
+    - - "search_presets.tsv":
+          type: file
+          description: Search parameter presets derived from the SDRF file, one row
+            per unique search configuration.
+          pattern: "search_presets.tsv"
+          ontologies: []
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - sdrf-pipelines:
+          type: string
+          description: The name of the tool
+      - "parse_sdrf --version | cut -d ' ' -f 2":
+          type: eval
+          description: The expression to obtain the version of the tool
+
+authors:
+  - "@jonasscheid"
+maintainers:
+  - "@jonasscheid"
@@ -230,6 +230,7 @@ profiles {
         singularity.runOptions  = '--nv'
     }
     test              { includeConfig 'conf/test.config'              }
+    test_sdrf         { includeConfig 'conf/test_sdrf.config'         }
     test_mokapot      { includeConfig 'conf/test_mokapot.config'      }
     test_percolator   { includeConfig 'conf/test_percolator.config'   }
     test_ionannotator { includeConfig 'conf/test_ionannotator.config' }