diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 32eca5fd..5cd685fc 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -16,7 +16,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/bigbio/quant - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/bigbio/quantms/tree/master/.github/CONTRIBUTING.md) -- [ ] If necessary, also make a PR on the bigbio/quantms _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If necessary, also make a PR on the bigbio/quantms _branch_ on the [bigbio/quantms-test-datasets](https://github.com/bigbio/quantms-test-datasets) repository. - [ ] Make sure your code lints (`nf-core pipelines lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). diff --git a/conf/tests/test_full_dia.config b/conf/tests/test_full_dia.config index 61b53189..d67b9ad2 100644 --- a/conf/tests/test_full_dia.config +++ b/conf/tests/test_full_dia.config @@ -25,7 +25,7 @@ params { outdir = './results_dia_full' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata-aws/dia_full/PXD004684.sdrf.tsv' + input = 'https://raw.githubusercontent.com/bigbio/quantms-test-datasets/quantms/testdata-aws/dia_full/PXD004684.sdrf.tsv' database = 'https://raw.githubusercontent.com/bigbio/quantms-test-datasets/quantms/testdata/dia_ci/REF_EColi_K12_UPS1_combined.fasta' min_pr_mz = 450 max_pr_mz = 1080 diff --git a/conf/tests/test_full_lfq.config b/conf/tests/test_full_lfq.config index 4fd77979..3c5a8057 100644 --- a/conf/tests/test_full_lfq.config +++ b/conf/tests/test_full_lfq.config @@ -25,8 +25,8 @@ params { outdir = "./results_lfq_full" // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata-aws/lfq_full/PXD001819.sdrf.tsv' - database = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata-aws/lfq_full/yeast_2021_04_reviewed.fasta' + input = 'https://raw.githubusercontent.com/bigbio/quantms-test-datasets/quantms/testdata-aws/lfq_full/PXD001819.sdrf.tsv' + database = 'https://raw.githubusercontent.com/bigbio/quantms-test-datasets/quantms/testdata-aws/lfq_full/yeast_2021_04_reviewed.fasta' search_engines = "msgf,comet" add_decoys = true protein_level_fdr_cutoff = 0.01 diff --git a/conf/tests/test_full_tmt.config b/conf/tests/test_full_tmt.config index f834f1ac..fbf93358 100644 --- a/conf/tests/test_full_tmt.config +++ b/conf/tests/test_full_tmt.config @@ -25,8 +25,8 @@ params { outdir = "./results_iso_full" // Input data for full size test - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata-aws/tmt_full/PXD005486.sdrf.tsv' - database = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata-aws/tmt_full/uniprot_E_coli_13spiked_reviewed_2021_04.fasta' + input = 'https://raw.githubusercontent.com/bigbio/quantms-test-datasets/quantms/testdata-aws/tmt_full/PXD005486.sdrf.tsv' + database = 'https://raw.githubusercontent.com/bigbio/quantms-test-datasets/quantms/testdata-aws/tmt_full/uniprot_E_coli_13spiked_reviewed_2021_04.fasta' search_engines = "comet,msgf" protein_level_fdr_cutoff = 0.01 psm_level_fdr_cutoff = 0.01 diff --git a/conf/tests/test_latest_dia.config b/conf/tests/test_latest_dia.config index eefa5ed8..11ec190b 100644 --- a/conf/tests/test_latest_dia.config +++ b/conf/tests/test_latest_dia.config @@ -40,7 +40,7 @@ process { } withLabel: diann { - container = 'ghcr.io/bigbio/diann:2.1.0' // This docker container is private in for quantms + container = 'ghcr.io/bigbio/diann:2.1.0' // This docker container is in a private registry for bigbio } resourceLimits = [ diff --git a/modules.json b/modules.json index 12808036..bdf0345a 100644 --- a/modules.json +++ b/modules.json @@ -12,8 +12,9 @@ }, "thermorawfileparser": { "branch": "main", - "git_sha": "a1a4a11ff508b2b5c23c9fb21c51c3327b748d4d", - "installed_by": ["modules"] + "git_sha": "53f2d78652a7040e3d44610286471642b1e1b55b", + "installed_by": ["modules"], + "patch": "modules/bigbio/thermorawfileparser/thermorawfileparser.diff" } } } diff --git a/modules/bigbio/thermorawfileparser/environment.yml b/modules/bigbio/thermorawfileparser/environment.yml index 63b8fc64..dbe6ffe5 100644 --- a/modules/bigbio/thermorawfileparser/environment.yml +++ b/modules/bigbio/thermorawfileparser/environment.yml @@ -1,4 +1,3 @@ -name: thermorawfileparser channels: - conda-forge - bioconda diff --git a/modules/bigbio/thermorawfileparser/main.nf b/modules/bigbio/thermorawfileparser/main.nf index 31ce4d0b..ac6a16f8 100644 --- a/modules/bigbio/thermorawfileparser/main.nf +++ b/modules/bigbio/thermorawfileparser/main.nf @@ -1,62 +1,67 @@ process THERMORAWFILEPARSER { - tag "$meta.mzml_id" + tag "${meta.mzml_id}" label 'process_low' label 'process_single' label 'error_retry' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/thermorawfileparser:1.4.5--h05cac1d_1' : - 'biocontainers/thermorawfileparser:1.4.5--h05cac1d_1' }" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/thermorawfileparser:1.4.5--h05cac1d_1' + : 'biocontainers/thermorawfileparser:1.4.5--h05cac1d_1'}" - stageInMode { - if (task.attempt == 1) { - if (task.executor == "awsbatch") { - 'symlink' - } else { - 'link' - } - } else if (task.attempt == 2) { - if (task.executor == "awsbatch") { - 'copy' - } else { - 'symlink' - } - } else { - 'copy' - } - } input: - tuple val(meta), path(rawfile) + tuple val(meta), path(raw) output: - tuple val(meta), path("*.{mzML,mgf,parquet}"), emit: convert_files - path "versions.yml", emit: versions - path "*.log", emit: log + tuple val(meta), path("*.{mzML,mzML.gz,mgf,mgf.gz,parquet,parquet.gz}"), emit: spectra + tuple val("${task.process}"), val('thermorawfileparser'), eval("ThermoRawFileParser.sh --version"), emit: versions_thermorawfileparser, topic: versions + path "*.log", emit: log + + when: + task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' + // Detect existing format options in any supported syntax: -f=2, -f 2, --format=2, + // or --format 2. + def hasFormatArg = (args =~ /(^|\s)(-f(=|\s)\d+|--format(=|\s)\d+)/).find() // Default to indexed mzML format (-f=2) if not specified in args - def formatArg = args.contains('-f=') ? '' : '-f=2' + def formatArg = hasFormatArg ? '' : '-f=2' + def prefix = task.ext.prefix ?: "${meta.mzml_id}" + def suffix = args.contains("--format 0") || args.contains("-f 0") + ? "mgf" + : args.contains("--format 1") || args.contains("-f 1") + ? "mzML" + : args.contains("--format 2") || args.contains("-f 2") + ? "mzML" + : args.contains("--format 3") || args.contains("-f 3") + ? "parquet" + : "mzML" + suffix = args.contains("--gzip") ? "${suffix}.gz" : "${suffix}" """ - ThermoRawFileParser.sh -i='${rawfile}' ${formatArg} ${args} -o=./ 2>&1 | tee '${rawfile.baseName}_conversion.log' - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ThermoRawFileParser: \$(ThermoRawFileParser.sh --version) - END_VERSIONS + ThermoRawFileParser.sh \\ + -i='${raw}' \\ + ${formatArg} ${args} \\ + -o=./ 2>&1 | tee '${prefix}_conversion.log' """ stub: - def prefix = task.ext.prefix ?: "${meta.mzml_id}" def args = task.ext.args ?: '' - // Determine output format from args, default to mzML - // Format 0 = MGF, formats 1-2 = mzML, format 3 = Parquet, format 4 = None - def outputExt = (args =~ /-f=0\b/).find() ? 'mgf' : 'mzML' + def prefix = task.ext.prefix ?: "${meta.mzml_id}" + def suffix = args.contains("--format 0") || args.contains("-f 0") + ? "mgf" + : args.contains("--format 1") || args.contains("-f 1") + ? "mzML" + : args.contains("--format 2") || args.contains("-f 2") + ? "mzML" + : args.contains("--format 3") || args.contains("-f 3") + ? "parquet" + : "mzML" + suffix = args.contains("--gzip") ? "${suffix}.gz" : "${suffix}" """ - touch '${prefix}.${outputExt}' + touch '${prefix}.${suffix}' touch '${prefix}_conversion.log' cat <<-END_VERSIONS > versions.yml diff --git a/modules/bigbio/thermorawfileparser/meta.yml b/modules/bigbio/thermorawfileparser/meta.yml index b6f99c9a..48b7d342 100644 --- a/modules/bigbio/thermorawfileparser/meta.yml +++ b/modules/bigbio/thermorawfileparser/meta.yml @@ -1,10 +1,12 @@ name: thermorawfileparser -description: Convert RAW file to mzML or MGF files +description: Convert RAW file to mzML or MGF files format keywords: - raw - - mzML - - MGF - - OpenMS + - mzml + - mgf + - parquet + - parser + - proteomics tools: - thermorawfileparser: description: | @@ -14,36 +16,66 @@ tools: - `-L` or `--msLevel=VALUE` to select MS levels (e.g., `-L=1,2` or `--msLevel=1-3`) homepage: https://github.com/compomics/ThermoRawFileParser documentation: https://github.com/compomics/ThermoRawFileParser + tool_dev_url: https://github.com/compomics/ThermoRawFileParser + doi: "10.1021/acs.jproteome.9b00328" + licence: + - "Apache Software" + identifier: biotools:ThermoRawFileParser input: - - meta: - type: map - description: | - Groovy Map containing sample information - - rawfile: - type: file - description: | - Thermo RAW file - pattern: "*.{raw,RAW}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - raw: + type: file + description: Thermo RAW file + pattern: "*.{raw,RAW}" + ontologies: [] output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'sample1', mzml_id:'UPS1_50amol_R3' ] - - convert_files: - type: file - description: | - Converted files in mzML or MGF format depending on the format parameter (-f). - Format options: 0 for MGF, 1 for mzML, 2 for indexed mzML (default), 3 for Parquet, 4 for None. - pattern: "*.{mzML,mgf,parquet}" - - log: - type: file - description: log file - pattern: "*.log" - - versions: - type: file - description: File containing software version - pattern: "versions.yml" + spectra: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.{mzML,mzML.gz,mgf,mgf.gz,parquet,parquet.gz}": + type: file + description: Mass spectra in open format + pattern: "*.{mzML,mzML.gz,mgf,mgf.gz,parquet,parquet.gz}" + ontologies: [] + versions_thermorawfileparser: + - - ${task.process}: + type: string + description: The process the versions were collected from + - thermorawfileparser: + type: string + description: The name of the tool + - ThermoRawFileParser.sh --version: + type: eval + description: The expression to obtain the version of the tool + log: + - "*.log": + type: file + description: Log file from the conversion process + pattern: "*.log" + ontologies: [] +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - thermorawfileparser: + type: string + description: The name of the tool + - ThermoRawFileParser.sh --version: + type: eval + description: The expression to obtain the version of the tool authors: + - "@jonasscheid" + - "@daichengxin" + - "@ypriverol" +maintainers: + - "@jonasscheid" - "@daichengxin" - "@ypriverol" diff --git a/modules/bigbio/thermorawfileparser/tests/main.nf.test b/modules/bigbio/thermorawfileparser/tests/main.nf.test index 355fbb15..c1f8ce95 100644 --- a/modules/bigbio/thermorawfileparser/tests/main.nf.test +++ b/modules/bigbio/thermorawfileparser/tests/main.nf.test @@ -22,8 +22,8 @@ nextflow_process { then { assert process.success - assert snapshot(process.out.versions).match("versions") - assert new File(process.out.convert_files[0][1]).name == 'TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzML' + assert snapshot(process.out.versions_thermorawfileparser).match("versions") + assert new File(process.out.spectra[0][1]).name == 'TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01.mzML' assert process.out.log.size() == 1 } } @@ -36,7 +36,7 @@ nextflow_process { process { """ input[0] = [ - [ id: 'test', mzml_id: 'test_sample' ], + [ id: 'test_sample', mzml_id: 'test_sample' ], file(params.test_data['proteomics']['msspectra']['ups1_50amol_r3'], checkIfExists: false) ] """ @@ -45,8 +45,9 @@ nextflow_process { then { assert process.success - assert snapshot(process.out.versions).match("versions_stub") - assert new File(process.out.convert_files[0][1]).name == 'test_sample.mzML' + assert snapshot(process.out.versions_thermorawfileparser).match("versions_stub") + assert new File(process.out.spectra[0][1]).name == 'test_sample.mzML' + assert snapshot(process.out).match() assert process.out.log.size() == 1 } } diff --git a/modules/bigbio/thermorawfileparser/tests/main.nf.test.snap b/modules/bigbio/thermorawfileparser/tests/main.nf.test.snap index 6562491e..f194dbb4 100644 --- a/modules/bigbio/thermorawfileparser/tests/main.nf.test.snap +++ b/modules/bigbio/thermorawfileparser/tests/main.nf.test.snap @@ -1,26 +1,34 @@ { - "versions": { - "content": [ - [ - "versions.yml:md5,dc9625538c025d615109ef8cac3a86ab" - ] - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.04.8" + "versions_stub": { + "content": [ + [ + [ + "THERMORAWFILEPARSER", + "thermorawfileparser", + "1.4.5" + ] + ] + ], + "timestamp": "2026-03-20T12:41:22.8183", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, - "timestamp": "2025-12-11T06:27:00.000000" - }, - "versions_stub": { - "content": [ - [ - "versions.yml:md5,dc9625538c025d615109ef8cac3a86ab" - ] - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.04.8" - }, - "timestamp": "2025-12-11T06:27:00.000000" - } -} + "versions": { + "content": [ + [ + [ + "THERMORAWFILEPARSER", + "thermorawfileparser", + "1.4.5" + ] + ] + ], + "timestamp": "2026-03-20T12:36:30.88531", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/modules/bigbio/thermorawfileparser/thermorawfileparser.diff b/modules/bigbio/thermorawfileparser/thermorawfileparser.diff new file mode 100644 index 00000000..05f21ecd --- /dev/null +++ b/modules/bigbio/thermorawfileparser/thermorawfileparser.diff @@ -0,0 +1,36 @@ +Changes in component 'bigbio/thermorawfileparser' +'modules/bigbio/thermorawfileparser/environment.yml' is unchanged +'modules/bigbio/thermorawfileparser/meta.yml' is unchanged +Changes in 'thermorawfileparser/main.nf': +--- modules/bigbio/thermorawfileparser/main.nf ++++ modules/bigbio/thermorawfileparser/main.nf +@@ -1,5 +1,5 @@ + process THERMORAWFILEPARSER { +- tag "${meta.id}" ++ tag "${meta.mzml_id}" + label 'process_low' + label 'process_single' + label 'error_retry' +@@ -27,7 +27,7 @@ + def hasFormatArg = (args =~ /(^|\s)(-f(=|\s)\d+|--format(=|\s)\d+)/).find() + // Default to indexed mzML format (-f=2) if not specified in args + def formatArg = hasFormatArg ? '' : '-f=2' +- def prefix = task.ext.prefix ?: "${meta.id}" ++ def prefix = task.ext.prefix ?: "${meta.mzml_id}" + def suffix = args.contains("--format 0") || args.contains("-f 0") + ? "mgf" + : args.contains("--format 1") || args.contains("-f 1") +@@ -48,7 +48,7 @@ + + stub: + def args = task.ext.args ?: '' +- def prefix = task.ext.prefix ?: "${meta.id}" ++ def prefix = task.ext.prefix ?: "${meta.mzml_id}" + def suffix = args.contains("--format 0") || args.contains("-f 0") + ? "mgf" + : args.contains("--format 1") || args.contains("-f 1") + +'modules/bigbio/thermorawfileparser/tests/main.nf.test.snap' is unchanged +'modules/bigbio/thermorawfileparser/tests/nextflow.config' is unchanged +'modules/bigbio/thermorawfileparser/tests/main.nf.test' is unchanged +************************************************************ diff --git a/modules/local/openms/false_discovery_rate/main.nf b/modules/local/openms/false_discovery_rate/main.nf index f0cf7e2e..37a872dd 100644 --- a/modules/local/openms/false_discovery_rate/main.nf +++ b/modules/local/openms/false_discovery_rate/main.nf @@ -28,6 +28,7 @@ process FALSE_DISCOVERY_RATE { -FDR:PSM ${params.run_fdr_cutoff} \\ -algorithm:add_decoy_peptides \\ -algorithm:add_decoy_proteins \\ + -algorithm:conservative ${params.fdr_conservative} \\ $args \\ 2>&1 | tee ${id_file.baseName}_fdr.log diff --git a/modules/local/openms/openms_peak_picker/main.nf b/modules/local/openms/openms_peak_picker/main.nf index 06d965e1..61999b17 100644 --- a/modules/local/openms/openms_peak_picker/main.nf +++ b/modules/local/openms/openms_peak_picker/main.nf @@ -19,7 +19,7 @@ process OPENMS_PEAK_PICKER { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.mzml_id}" - in_mem = params.peakpicking_inmemory ? "inmermory" : "lowmemory" + in_mem = params.peakpicking_inmemory ? "inmemory" : "lowmemory" lvls = params.peakpicking_ms_levels ? "-algorithm:ms_levels ${params.peakpicking_ms_levels}" : "" """ diff --git a/modules/local/openms/proteomicslfq/main.nf b/modules/local/openms/proteomicslfq/main.nf index de4004e8..35509bb5 100644 --- a/modules/local/openms/proteomicslfq/main.nf +++ b/modules/local/openms/proteomicslfq/main.nf @@ -48,6 +48,7 @@ process PROTEOMICSLFQ { ${feature_with_id_min_score} \\ ${feature_without_id_min_score} \\ -mass_recalibration ${params.mass_recalibration} \\ + -Seeding:algorithm ${params.lfq_seeding_algorithm} \\ -Seeding:intThreshold ${params.lfq_intensity_threshold} \\ -protein_quantification ${params.protein_quant} \\ -alignment_order ${params.alignment_order} \\ diff --git a/modules/local/utils/msrescore_features/main.nf b/modules/local/utils/msrescore_features/main.nf index c98a1655..e054cfad 100644 --- a/modules/local/utils/msrescore_features/main.nf +++ b/modules/local/utils/msrescore_features/main.nf @@ -3,8 +3,8 @@ process MSRESCORE_FEATURES { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.15' : - 'ghcr.io/bigbio/quantms-rescoring:0.0.15' }" + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.16' : + 'ghcr.io/bigbio/quantms-rescoring:0.0.16' }" input: tuple val(meta), path(idxml), path(mzml), path(model_weight), val(search_engine) diff --git a/modules/local/utils/msrescore_fine_tuning/main.nf b/modules/local/utils/msrescore_fine_tuning/main.nf index c7d1d839..aa7ae59b 100644 --- a/modules/local/utils/msrescore_fine_tuning/main.nf +++ b/modules/local/utils/msrescore_fine_tuning/main.nf @@ -3,8 +3,8 @@ process MSRESCORE_FINE_TUNING { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.15' : - 'ghcr.io/bigbio/quantms-rescoring:0.0.15' }" + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.16' : + 'ghcr.io/bigbio/quantms-rescoring:0.0.16' }" input: tuple val(meta), path(idxml), path(mzml), val(groupkey), path(ms2_model_dir) diff --git a/modules/local/utils/psm_clean/main.nf b/modules/local/utils/psm_clean/main.nf index e83b98ab..3417b0e6 100644 --- a/modules/local/utils/psm_clean/main.nf +++ b/modules/local/utils/psm_clean/main.nf @@ -3,8 +3,8 @@ process PSM_CLEAN { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.15' : - 'ghcr.io/bigbio/quantms-rescoring:0.0.15' }" + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.16' : + 'ghcr.io/bigbio/quantms-rescoring:0.0.16' }" input: tuple val(meta), path(idxml), path(mzml) diff --git a/modules/local/utils/spectrum_features/main.nf b/modules/local/utils/spectrum_features/main.nf index 87dbb4ce..baaa989b 100644 --- a/modules/local/utils/spectrum_features/main.nf +++ b/modules/local/utils/spectrum_features/main.nf @@ -3,8 +3,8 @@ process SPECTRUM_FEATURES { label 'process_low' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.15' : - 'ghcr.io/bigbio/quantms-rescoring:0.0.15' }" + 'oras://ghcr.io/bigbio/quantms-rescoring-sif:0.0.16' : + 'ghcr.io/bigbio/quantms-rescoring:0.0.16' }" input: tuple val(meta), path(id_file), val(search_engine), path(ms_file) diff --git a/nextflow.config b/nextflow.config index bdd88169..7083a0a9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -31,6 +31,7 @@ params { run_fdr_cutoff = 0.10 protein_level_fdr_cutoff = 0.01 psm_level_fdr_cutoff = 0.01 + fdr_conservative = true // Use (D+1)/T formula (true) or (D+1)/(T+D) (false) for FDR estimation psm_clean = false // Debug level @@ -56,12 +57,12 @@ params { shuffle_max_attempts = 30 shuffle_sequence_identity_threshold = 0.5 - //// Peak picking if used + // Peak picking if used openms_peakpicking = false peakpicking_inmemory = false peakpicking_ms_levels = null // means all/auto - //// Conversion and mzml statistics flags + // Conversion and mzml statistics flags reindex_mzml = true mzml_statistics = false mzml_features = false @@ -184,6 +185,7 @@ params { protein_inference_method = 'aggregation' protein_quant = 'unique_peptides' quantification_method = 'feature_intensity' + lfq_seeding_algorithm = 'multiplex' // Feature seeding: 'multiplex' (default) or 'biosaur2' mass_recalibration = false alignment_order = 'star' quantify_decoys = false @@ -271,7 +273,7 @@ params { help_full = false show_hidden = false version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/bigbio/quantms-test-datasets/' trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') // Config options diff --git a/nextflow_schema.json b/nextflow_schema.json index bb073f47..03521dbc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -378,25 +378,25 @@ "type": "number", "description": "The minimum precursor m/z for the in silico library generation or library-free search", "fa_icon": "fas fa-filter", - "default": 400.0 + "default": 400 }, "max_pr_mz": { "type": "number", "description": "The maximum precursor m/z for the in silico library generation or library-free search", "fa_icon": "fas fa-filter", - "default": 2400.0 + "default": 2400 }, "min_fr_mz": { "type": "number", "description": "The minimum fragment m/z for the in silico library generation or library-free search", "fa_icon": "fas fa-filter", - "default": 100.0 + "default": 100 }, "max_fr_mz": { "type": "number", "description": "The maximum fragment m/z for the in silico library generation or library-free search", "fa_icon": "fas fa-filter", - "default": 1800.0 + "default": 1800 }, "db_debug": { "type": "integer", @@ -459,7 +459,6 @@ "onsite_add_decoys": { "type": "boolean", "description": "Add decoy modifications for validation", - "default": false, "fa_icon": "fas fa-shield-alt", "help_text": "When enabled, adds decoy modifications for validation. For AScore/PhosphoRS, adds --add-decoys flag. For LucXor, adds PhosphoDecoy(A) to target modifications.", "hidden": true @@ -501,14 +500,12 @@ "onsite_disable_split_by_charge": { "type": "boolean", "description": "Disable splitting PSMs by charge state for lucxor", - "default": false, "fa_icon": "fas fa-ban", "hidden": true }, "onsite_compute_all_scores": { "type": "boolean", "description": "Compute all scores for all candidate sites", - "default": false, "fa_icon": "fas fa-calculator", "hidden": true }, @@ -662,7 +659,6 @@ "type": "boolean", "description": "Force save fine-tuning model", "help_text": "When enabled, Force save fine-tuning model even if retrained model is not better than pretrained model", - "default": false, "fa_icon": "far fa-check-square" }, "epoch_to_train_ms2": { @@ -836,26 +832,26 @@ "min_precursor_intensity": { "type": "number", "description": "Minimum intensity of the precursor to be extracted", - "default": 1.0, + "default": 1, "fa_icon": "fas fa-sliders-h" }, "min_precursor_purity": { "type": "number", "description": "Minimum fraction of the total intensity. 0.0:1.0", - "default": 0.0, + "default": 0, "fa_icon": "fas fa-sliders-h", "help_text": "Minimum fraction of the total intensity in the isolation window of the precursor spectrum" }, "min_reporter_intensity": { "type": "number", "description": "Minimum intensity of the individual reporter ions to be extracted.", - "default": 0.0, + "default": 0, "fa_icon": "fas fa-sliders-h" }, "precursor_isotope_deviation": { "type": "number", "description": "Maximum allowed deviation (in ppm) between theoretical and observed isotopic peaks of the precursor peak", - "default": 10.0, + "default": 10, "fa_icon": "fas fa-sliders-h" }, "isotope_correction": { @@ -970,6 +966,13 @@ "fa_icon": "fas fa-filter", "help_text": "After applying protein-level FDR cutoff, this additionally filters PSMs to be used for quantification and reporting." }, + "fdr_conservative": { + "type": "boolean", + "description": "Use conservative FDR formula (D+1)/T instead of (D+1)/(T+D). Default: true", + "default": true, + "fa_icon": "fas fa-shield-alt", + "help_text": "When true, uses the conservative formula (D+1)/T for FDR estimation, which provides an upper bound on the true FDR. When false, uses (D+1)/(T+D) which gives a tighter estimate. See Keich & Noble (2025, Nature Methods) for details on FDR estimation methods." + }, "protein_inference_debug": { "type": "integer", "description": "Debug level for the protein inference step. Increase for verbose logging", @@ -1061,6 +1064,13 @@ "enum": ["feature_intensity", "spectral_counting"], "fa_icon": "fas fa-list-ol" }, + "lfq_seeding_algorithm": { + "type": "string", + "description": "Feature detection seeding algorithm for ProteomicsLFQ. 'multiplex' uses FeatureFinderMultiplex (default), 'biosaur2' uses the Biosaur2 algorithm.", + "default": "multiplex", + "enum": ["multiplex", "biosaur2"], + "fa_icon": "fas fa-seedling" + }, "mass_recalibration": { "type": "boolean", "description": "Recalibrates masses based on precursor mass deviations to correct for instrument biases. (default: 'false')", @@ -1089,7 +1099,7 @@ "lfq_intensity_threshold": { "type": "number", "description": "The minimum intensity for a feature to be considered for quantification. (default: '1000')", - "default": 1000.0, + "default": 1000, "fa_icon": "fas fa-filter", "help_text": "The minimum intensity for a feature to be considered for quantification. (default: '1000')" }, @@ -1509,7 +1519,7 @@ "type": "string", "fa_icon": "far fa-check-circle", "description": "Base URL or local path to location of pipeline test dataset files", - "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "default": "https://raw.githubusercontent.com/bigbio/quantms-test-datasets/", "hidden": true }, "trace_report_suffix": { diff --git a/subworkflows/local/file_preparation/main.nf b/subworkflows/local/file_preparation/main.nf index a0f77028..0212c6a0 100644 --- a/subworkflows/local/file_preparation/main.nf +++ b/subworkflows/local/file_preparation/main.nf @@ -85,8 +85,8 @@ workflow FILE_PREPARATION { // 'log': Path(*.txt)} // Where meta is the same as the input meta - ch_versions = ch_versions.mix(THERMORAWFILEPARSER.out.versions) - ch_results = ch_results.mix(THERMORAWFILEPARSER.out.convert_files) + // ch_versions = ch_versions.mix(THERMORAWFILEPARSER.out.versions_thermorawfileparser) + ch_results = ch_results.mix(THERMORAWFILEPARSER.out.spectra) ch_results.map{ it -> [it[0], it[1]] }.set{ indexed_mzml_bundle } diff --git a/tests/nextflow.config b/tests/nextflow.config index 07d8a65a..c8731282 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -7,8 +7,8 @@ // TODO nf-core: Specify any additional parameters here // Or any resources requirements params { - modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/quantms' + modules_testdata_base_path = 'https://raw.githubusercontent.com/bigbio/quantms-test-datasets/modules/data/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/bigbio/quantms-test-datasets/refs/heads/quantms' } aws.client.anonymous = true // fixes S3 access issues on self-hosted runners diff --git a/workflows/quantms.nf b/workflows/quantms.nf index d4ef36bf..8e634b26 100644 --- a/workflows/quantms.nf +++ b/workflows/quantms.nf @@ -164,7 +164,26 @@ workflow QUANTMS { // Other subworkflow will return null when performing another subworkflow due to unknown reason. ch_versions = ch_versions.filter { v -> v != null } - softwareVersionsToYAML(ch_versions) + // see https://nf-co.re/docs/tutorials/migrate_to_topics/update_pipelines + // which is used in https://github.com/nf-core/demo/blob/1.1.0/workflows/demo.nf + def topic_versions = channel.topic("versions") + .distinct() + .branch { entry -> + versions_file: entry instanceof Path + versions_tuple: true + } + def topic_versions_string = topic_versions.versions_tuple + .map { process, tool, version -> + [process[process.lastIndexOf(':') + 1..-1], " ${tool}: ${version}"] + } + .groupTuple(by: 0) + .map { process, tool_versions -> + tool_versions.unique().sort() + "${process}:\n${tool_versions.join('\n')}" + } + + softwareVersionsToYAML(ch_versions.mix(topic_versions.versions_file)) + .mix(topic_versions_string) .collectFile( storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_' + 'quantms_software_' + 'mqc_' + 'versions.yml',