From 6424a034992b12eb12f5f8f2568c5ead82aaf9af Mon Sep 17 00:00:00 2001 From: tonywu1999 Date: Thu, 10 Jul 2025 15:05:41 -0700 Subject: [PATCH 1/4] fix(maxQuant): Remove unmodified peptides from PTM dataset --- R/converters.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/converters.R b/R/converters.R index 66b5874..98768fb 100644 --- a/R/converters.R +++ b/R/converters.R @@ -640,8 +640,6 @@ MaxQtoMSstatsPTMFormat = function(evidence=NULL, keep = 1)] } - MSstatsPTMformat = list('PTM' = msstatsptm_input) - if (!is.null(evidence_prot)){ annotation_protein = as.data.table(annotation_protein) @@ -659,7 +657,7 @@ MaxQtoMSstatsPTMFormat = function(evidence=NULL, MSstatsPTMformat = list('PTM' = msstatsptm_input, "PROTEIN" = msstats.abun) - + return(MSstatsPTMformat) } if (use_unmod_peptides){ @@ -668,6 +666,9 @@ MaxQtoMSstatsPTMFormat = function(evidence=NULL, MSstatsPTMformat = list(PTM = msstatsptm_input, PROTEIN = msstats.abun) + } else { + msstatsptm_input = msstatsptm_input[grepl(mod_id, msstatsptm_input$PeptideSequence),] + MSstatsPTMformat = list('PTM' = msstatsptm_input) } return(MSstatsPTMformat) From 55b5a499893f7f1b3043a7b211992618a83020c8 Mon Sep 17 00:00:00 2001 From: tonywu1999 Date: Thu, 10 Jul 2025 15:08:59 -0700 Subject: [PATCH 2/4] refactor such that unmodified are removed when global profiling dataset exists --- R/converters.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/converters.R b/R/converters.R index 98768fb..5f06062 100644 --- a/R/converters.R +++ b/R/converters.R @@ -655,6 +655,7 @@ MaxQtoMSstatsPTMFormat = function(evidence=NULL, proteinID = which_proteinid_protein) } + msstatsptm_input = msstatsptm_input[grepl(mod_id, msstatsptm_input$PeptideSequence),] MSstatsPTMformat = list('PTM' = msstatsptm_input, "PROTEIN" = msstats.abun) return(MSstatsPTMformat) From fa3658e241ecd8300c4e13190a4837fecf6739b8 Mon Sep 17 00:00:00 2001 From: tonywu1999 Date: Fri, 11 Jul 2025 17:35:08 -0700 Subject: [PATCH 3/4] add unit tests --- inst/tinytest/test_converters.R | 48 ++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/inst/tinytest/test_converters.R b/inst/tinytest/test_converters.R index d30af05..4ba6268 100644 --- a/inst/tinytest/test_converters.R +++ b/inst/tinytest/test_converters.R @@ -5,9 +5,13 @@ #' @author Anthony Wu #' #' @param msstats_ptm_input A list containing PTM and PROTEIN data tables -.validatePositiveNumberOfRows = function(msstats_ptm_input) { +.validatePositiveNumberOfRows = function(msstats_ptm_input, global_profiling = TRUE) { expect_true(nrow(msstats_ptm_input$PTM) > 0) - expect_true(nrow(msstats_ptm_input$PROTEIN) > 0) + if (global_profiling) { + expect_true(nrow(msstats_ptm_input$PROTEIN) > 0) + } else { + expect_true(is.null(msstats_ptm_input$PROTEIN)) + } } #' Validate protein ID count in converter output @@ -35,7 +39,7 @@ ) } -## MaxQ TMT +## MaxQ TMT useUnmodPeptides = TRUE data("maxq_tmt_evidence", package = "MSstatsPTM") data("maxq_tmt_annotation", package = "MSstatsPTM") @@ -84,12 +88,26 @@ mq_imported = MaxQtoMSstatsPTMFormat(evidence=maxq_tmt_evidence, .validateProteinId(mq_imported$PTM, "P29966_T150", 70) .validateProteinId(mq_imported$PTM, "P29966_T143_S145", 10) .validatePtmSubstring( - mq_imported$PTM, "Phospho \\(STY\\)", - length(mq_imported$PTM$PeptideSequence)) + mq_imported$PTM, "Phospho \\(STY\\)", + length(mq_imported$PTM$PeptideSequence)) .validatePtmSubstring( - mq_imported$PROTEIN, "Phospho \\(STY\\)", 0) + mq_imported$PROTEIN, "Phospho \\(STY\\)", 0) + +## MaxQ TMT useUnmodPeptides = FALSE +mq_imported = MaxQtoMSstatsPTMFormat(evidence=maxq_tmt_evidence, + annotation=maxq_tmt_annotation, + fasta=system.file("extdata", "maxq_tmt_fasta.fasta", package="MSstatsPTM"), + fasta_protein_name="uniprot_ac", + use_unmod_peptides=FALSE, + labeling_type = "TMT") +.validatePositiveNumberOfRows(mq_imported, global_profiling = FALSE) +.validateProteinId(mq_imported$PTM, "P29966_T150", 70) +.validateProteinId(mq_imported$PTM, "P29966_T143_S145", 10) +.validatePtmSubstring( + mq_imported$PTM, "Phospho \\(STY\\)", + length(mq_imported$PTM$PeptideSequence)) -## MaxQ LF +## MaxQ LF useUnmodPeptides = TRUE data("maxq_lf_evidence", package = "MSstatsPTM") data("maxq_lf_annotation", package = "MSstatsPTM") @@ -155,6 +173,22 @@ mq_imported = MaxQtoMSstatsPTMFormat(evidence=maxq_lf_evidence, .validatePtmSubstring( mq_imported$PROTEIN, "Phospho \\(STY\\)", 0) +## MaxQ LF useUnmodPeptides = FALSE +mq_imported = MaxQtoMSstatsPTMFormat(evidence=maxq_lf_evidence, + annotation=maxq_lf_annotation, + fasta=system.file("extdata", "maxq_lf_fasta.fasta", package="MSstatsPTM"), + fasta_protein_name="uniprot_ac", + mod_id="\\(Phospho \\(STY\\)\\)", + use_unmod_peptides=FALSE, + labeling_type = "LF", + which_proteinid_ptm = "Proteins") +.validatePositiveNumberOfRows(mq_imported, global_profiling = FALSE) +.validateProteinId(mq_imported$PTM, "P36578_S295", 66) +.validateProteinId(mq_imported$PTM, "Q13523_S431_S437", 33) +.validatePtmSubstring( + mq_imported$PTM, "Phospho \\(STY\\)", + length(mq_imported$PTM$PeptideSequence)) + ## Spectronaut data("spectronaut_input", package = "MSstatsPTM") data("spectronaut_annotation", package = "MSstatsPTM") From f7b3b2fbe88e1c788fe09a0c45261a0d635dea90 Mon Sep 17 00:00:00 2001 From: tonywu1999 Date: Fri, 11 Jul 2025 17:47:44 -0700 Subject: [PATCH 4/4] update unit test explanation --- inst/tinytest/test_converters.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inst/tinytest/test_converters.R b/inst/tinytest/test_converters.R index 4ba6268..6fb9d23 100644 --- a/inst/tinytest/test_converters.R +++ b/inst/tinytest/test_converters.R @@ -5,6 +5,8 @@ #' @author Anthony Wu #' #' @param msstats_ptm_input A list containing PTM and PROTEIN data tables +#' @param global_profiling Default TRUE indicates msstats_ptm_input should have +#' the PROTEIN element .validatePositiveNumberOfRows = function(msstats_ptm_input, global_profiling = TRUE) { expect_true(nrow(msstats_ptm_input$PTM) > 0) if (global_profiling) {