From be65b257cd17b3e642223d09da9486af91986a15 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Fri, 6 Feb 2026 13:07:18 -0500 Subject: [PATCH 1/5] refactor(diann): move q-value filtering to MSstatsClean for DIANN --- DESCRIPTION | 2 +- R/clean_DIANN.R | 55 ++++++++++++++++++++++++----- R/converters_DIANNtoMSstatsFormat.R | 33 ----------------- man/MSstatsClean.Rd | 19 ++++++++-- man/dot-cleanRawDIANN.Rd | 19 +++++++++- 5 files changed, 83 insertions(+), 45 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 81b3bb72..cb89a875 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,7 +14,7 @@ License: Artistic-2.0 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 biocViews: MassSpectrometry, Proteomics, Software, DataImport, QualityControl Depends: R (>= 4.0) diff --git a/R/clean_DIANN.R b/R/clean_DIANN.R index 2c8a46b7..96d49e63 100644 --- a/R/clean_DIANN.R +++ b/R/clean_DIANN.R @@ -1,14 +1,14 @@ #' Clean raw Diann files #' @param msstats_object an object of class `MSstatsDIANNFiles`. -#' @param MBR True if analysis was done with match between runs -#' @param quantificationColumn Use 'FragmentQuantCorrected'(default) column for quantified intensities for DIANN 1.8.x. -#' Use 'FragmentQuantRaw' for quantified intensities for DIANN 1.9.x. -#' Use 'auto' for quantified intensities for DIANN 2.x where each fragment intensity is a separate column, e.g. Fr0Quantity. +#' @inheritParams DIANNtoMSstatsFormat #' @return data.table #' @importFrom stats na.omit #' @keywords internal .cleanRawDIANN <- function(msstats_object, MBR = TRUE, - quantificationColumn = "FragmentQuantCorrected") { + quantificationColumn = "FragmentQuantCorrected", + global_qvalue_cutoff = 0.01, + qvalue_cutoff = 0.01, + pg_qvalue_cutoff = 0.01) { dn_input <- getInputFile(msstats_object, "input") dn_input <- data.table::as.data.table(dn_input) @@ -28,7 +28,10 @@ dn_input <- .cleanDIANNProcessFragmentInfo(dn_input, quantificationColumn) # Clean and filter data - dn_input <- .cleanDIANNCleanAndFilterData(dn_input, quantificationColumn) + dn_input <- .cleanDIANNCleanAndFilterData(dn_input, MBR, quantificationColumn, + global_qvalue_cutoff, + qvalue_cutoff, + pg_qvalue_cutoff) # Rename columns dn_input <- .cleanDIANNRenameColumns(dn_input, quantificationColumn) @@ -145,14 +148,50 @@ #' Clean and filter data by removing unwanted fragments and NA values #' @param dn_input data.table input -#' @param quantificationColumn quantification column name +#' @inheritParams DIANNtoMSstatsFormat #' @return cleaned data.table #' @noRd -.cleanDIANNCleanAndFilterData <- function(dn_input, quantificationColumn) { +.cleanDIANNCleanAndFilterData <- function(dn_input, MBR, quantificationColumn, + global_qvalue_cutoff, + qvalue_cutoff, + pg_qvalue_cutoff) { # Remove NH3 and H2O loss fragments & remove rows with NA in quant column dn_input <- dn_input[!grepl("NH3|H2O", FragmentIon) & !is.na(get(quantificationColumn))] + msg = paste0('** Filtering on Global Q Value < ', global_qvalue_cutoff) + getOption("MSstatsLog")("INFO", msg) + getOption("MSstatsMsg")("INFO", msg) + + dn_input = dn_input[QValue < global_qvalue_cutoff, ] + if (MBR) { + msg = '** MBR was used to analyze the data. Now setting names and filtering' + msg_1_mbr = paste0('-- LibPGQValue < ', pg_qvalue_cutoff) + msg_2_mbr = paste0('-- LibQValue < ', qvalue_cutoff) + dn_input = dn_input[LibPGQValue < pg_qvalue_cutoff, ] + dn_input = dn_input[LibQValue < qvalue_cutoff, ] + getOption("MSstatsLog")("INFO", msg) + getOption("MSstatsMsg")("INFO", msg) + getOption("MSstatsLog")("INFO", msg_1_mbr) + getOption("MSstatsMsg")("INFO", msg_1_mbr) + getOption("MSstatsLog")("INFO", msg_2_mbr) + getOption("MSstatsMsg")("INFO", msg_2_mbr) + # getOption("MSstatsLog")("INFO", "\n") + } else{ + msg = '** MBR was not used to analyze the data. Now setting names and filtering' + msg_1 = paste0('-- Filtering on GlobalPGQValue < ', pg_qvalue_cutoff) + msg_2 = paste0('-- Filtering on GlobalQValue < ', qvalue_cutoff) + dn_input = dn_input[GlobalPGQValue < pg_qvalue_cutoff, ] + dn_input = dn_input[GlobalQValue < qvalue_cutoff, ] + getOption("MSstatsLog")("INFO", msg) + getOption("MSstatsMsg")("INFO", msg) + getOption("MSstatsLog")("INFO", msg_1) + getOption("MSstatsMsg")("INFO", msg_1) + getOption("MSstatsLog")("INFO", msg_2) + getOption("MSstatsMsg")("INFO", msg_2) + # getOption("MSstatsLog")("INFO", "\n") + } + return(dn_input) } diff --git a/R/converters_DIANNtoMSstatsFormat.R b/R/converters_DIANNtoMSstatsFormat.R index 22d6223a..06fd4d64 100644 --- a/R/converters_DIANNtoMSstatsFormat.R +++ b/R/converters_DIANNtoMSstatsFormat.R @@ -83,39 +83,6 @@ DIANNtoMSstatsFormat = function(input, annotation = NULL, filter = removeOxidationMpeptides, drop_column = FALSE) - msg = paste0('** Filtering on Global Q Value < ', global_qvalue_cutoff) - getOption("MSstatsLog")("INFO", msg) - getOption("MSstatsMsg")("INFO", msg) - - input = input[DetectionQValue < global_qvalue_cutoff, ] - if (MBR) { - msg = '** MBR was used to analyze the data. Now setting names and filtering' - msg_1_mbr = paste0('-- LibPGQValue < ', pg_qvalue_cutoff) - msg_2_mbr = paste0('-- LibQValue < ', qvalue_cutoff) - input = input[LibPGQValue < pg_qvalue_cutoff, ] - input = input[LibQValue < qvalue_cutoff, ] - getOption("MSstatsLog")("INFO", msg) - getOption("MSstatsMsg")("INFO", msg) - getOption("MSstatsLog")("INFO", msg_1_mbr) - getOption("MSstatsMsg")("INFO", msg_1_mbr) - getOption("MSstatsLog")("INFO", msg_2_mbr) - getOption("MSstatsMsg")("INFO", msg_2_mbr) - # getOption("MSstatsLog")("INFO", "\n") - } else{ - msg = '** MBR was not used to analyze the data. Now setting names and filtering' - msg_1 = paste0('-- Filtering on GlobalPGQValue < ', pg_qvalue_cutoff) - msg_2 = paste0('-- Filtering on GlobalQValue < ', qvalue_cutoff) - input = input[GlobalPGQValue < pg_qvalue_cutoff, ] - input = input[GlobalQValue < qvalue_cutoff, ] - getOption("MSstatsLog")("INFO", msg) - getOption("MSstatsMsg")("INFO", msg) - getOption("MSstatsLog")("INFO", msg_1) - getOption("MSstatsMsg")("INFO", msg_1) - getOption("MSstatsLog")("INFO", msg_2) - getOption("MSstatsMsg")("INFO", msg_2) - # getOption("MSstatsLog")("INFO", "\n") - } - feature_columns = c("PeptideSequence", "PrecursorCharge", "FragmentIon", "ProductCharge") input = MSstatsConvert::MSstatsPreprocess( diff --git a/man/MSstatsClean.Rd b/man/MSstatsClean.Rd index 0e71ab7d..3e162ca1 100644 --- a/man/MSstatsClean.Rd +++ b/man/MSstatsClean.Rd @@ -66,7 +66,10 @@ MSstatsClean(msstats_object, ...) \S4method{MSstatsClean}{MSstatsDIANNFiles}( msstats_object, MBR = TRUE, - quantificationColumn = "FragmentQuantCorrected" + quantificationColumn = "FragmentQuantCorrected", + global_qvalue_cutoff = 0.01, + qvalue_cutoff = 0.01, + pg_qvalue_cutoff = 0.01 ) \S4method{MSstatsClean}{MSstatsMetamorpheusFiles}(msstats_object, MBR = TRUE, qvalue_cutoff = 0.05) @@ -129,7 +132,19 @@ removed based on the IsUnique column from Philosopher output} Use 'FragmentQuantRaw' for quantified intensities for DIANN 1.9.x. Use 'auto' for quantified intensities for DIANN 2.x where each fragment intensity is a separate column, e.g. Fr0Quantity.} -\item{qvalue_cutoff}{The q-value cutoff for filtering peaks detected by MBR} +\item{global_qvalue_cutoff}{The qvalue cutoff for the Q.Value column, i.e. +the run-specific precursor q-value. Default is 0.01.} + +\item{qvalue_cutoff}{If MBR is false, the qvalue cutoff for the Global.Q.Value +column, i.e. global precursor q-value. If MBR is true, the qvalue cutoff for the +Lib.Q.Value column, i.e. the q-value for the library created after the first MBR pass. +Default is 0.01.} + +\item{pg_qvalue_cutoff}{If MBR is false, the qvalue cutoff for the Global.PG.Q.Value +column, i.e. the global q-value for the protein group. If MBR is true, the +qvalue cutoff for the Lib.PG.Q.Value column, i.e. the protein group q-value for +the library created after the first MBR pass. Run should be the same as filename. +Default is 0.01.} } \value{ data.table diff --git a/man/dot-cleanRawDIANN.Rd b/man/dot-cleanRawDIANN.Rd index 118195e7..1c92f607 100644 --- a/man/dot-cleanRawDIANN.Rd +++ b/man/dot-cleanRawDIANN.Rd @@ -7,7 +7,10 @@ .cleanRawDIANN( msstats_object, MBR = TRUE, - quantificationColumn = "FragmentQuantCorrected" + quantificationColumn = "FragmentQuantCorrected", + global_qvalue_cutoff = 0.01, + qvalue_cutoff = 0.01, + pg_qvalue_cutoff = 0.01 ) } \arguments{ @@ -18,6 +21,20 @@ \item{quantificationColumn}{Use 'FragmentQuantCorrected'(default) column for quantified intensities for DIANN 1.8.x. Use 'FragmentQuantRaw' for quantified intensities for DIANN 1.9.x. Use 'auto' for quantified intensities for DIANN 2.x where each fragment intensity is a separate column, e.g. Fr0Quantity.} + +\item{global_qvalue_cutoff}{The qvalue cutoff for the Q.Value column, i.e. +the run-specific precursor q-value. Default is 0.01.} + +\item{qvalue_cutoff}{If MBR is false, the qvalue cutoff for the Global.Q.Value +column, i.e. global precursor q-value. If MBR is true, the qvalue cutoff for the +Lib.Q.Value column, i.e. the q-value for the library created after the first MBR pass. +Default is 0.01.} + +\item{pg_qvalue_cutoff}{If MBR is false, the qvalue cutoff for the Global.PG.Q.Value +column, i.e. the global q-value for the protein group. If MBR is true, the +qvalue cutoff for the Lib.PG.Q.Value column, i.e. the protein group q-value for +the library created after the first MBR pass. Run should be the same as filename. +Default is 0.01.} } \value{ data.table From 0429e8f24a8ac246e471cffd64c13401ccd4fa3d Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Fri, 6 Feb 2026 13:08:56 -0500 Subject: [PATCH 2/5] fix parameter passing --- R/converters_DIANNtoMSstatsFormat.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/converters_DIANNtoMSstatsFormat.R b/R/converters_DIANNtoMSstatsFormat.R index 06fd4d64..1cd8f526 100644 --- a/R/converters_DIANNtoMSstatsFormat.R +++ b/R/converters_DIANNtoMSstatsFormat.R @@ -71,7 +71,10 @@ DIANNtoMSstatsFormat = function(input, annotation = NULL, input = MSstatsConvert::MSstatsImport(list(input = input), "MSstats", "DIANN") input = MSstatsConvert::MSstatsClean(input, MBR = MBR, - quantificationColumn = quantificationColumn) + quantificationColumn = quantificationColumn, + global_qvalue_cutoff, + qvalue_cutoff, + pg_qvalue_cutoff) annotation = MSstatsConvert::MSstatsMakeAnnotation(input, annotation) decoy_filter = list(col_name = "ProteinName", From c3ca10476e761bbd4db5dedbdebd808a89fb1a51 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Fri, 6 Feb 2026 14:44:41 -0500 Subject: [PATCH 3/5] fix docs --- R/converters_DIANNtoMSstatsFormat.R | 9 ++++----- man/DIANNtoMSstatsFormat.Rd | 3 +-- man/MSstatsClean.Rd | 3 +-- man/dot-cleanRawDIANN.Rd | 3 +-- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/R/converters_DIANNtoMSstatsFormat.R b/R/converters_DIANNtoMSstatsFormat.R index 1cd8f526..3205aca9 100644 --- a/R/converters_DIANNtoMSstatsFormat.R +++ b/R/converters_DIANNtoMSstatsFormat.R @@ -14,8 +14,7 @@ #' @param pg_qvalue_cutoff If MBR is false, the qvalue cutoff for the Global.PG.Q.Value #' column, i.e. the global q-value for the protein group. If MBR is true, the #' qvalue cutoff for the Lib.PG.Q.Value column, i.e. the protein group q-value for -#' the library created after the first MBR pass. Run should be the same as filename. -#' Default is 0.01. +#' the library created after the first MBR pass. Default is 0.01. #' @param useUniquePeptide should unique pepties be removed #' @param removeFewMeasurements should proteins with few measurements be removed #' @param removeOxidationMpeptides should peptides with oxidation be removed @@ -72,9 +71,9 @@ DIANNtoMSstatsFormat = function(input, annotation = NULL, "MSstats", "DIANN") input = MSstatsConvert::MSstatsClean(input, MBR = MBR, quantificationColumn = quantificationColumn, - global_qvalue_cutoff, - qvalue_cutoff, - pg_qvalue_cutoff) + global_qvalue_cutoff = global_qvalue_cutoff, + qvalue_cutoff = qvalue_cutoff, + pg_qvalue_cutoff = pg_qvalue_cutoff) annotation = MSstatsConvert::MSstatsMakeAnnotation(input, annotation) decoy_filter = list(col_name = "ProteinName", diff --git a/man/DIANNtoMSstatsFormat.Rd b/man/DIANNtoMSstatsFormat.Rd index 269db0a3..76467174 100644 --- a/man/DIANNtoMSstatsFormat.Rd +++ b/man/DIANNtoMSstatsFormat.Rd @@ -40,8 +40,7 @@ Default is 0.01.} \item{pg_qvalue_cutoff}{If MBR is false, the qvalue cutoff for the Global.PG.Q.Value column, i.e. the global q-value for the protein group. If MBR is true, the qvalue cutoff for the Lib.PG.Q.Value column, i.e. the protein group q-value for -the library created after the first MBR pass. Run should be the same as filename. -Default is 0.01.} +the library created after the first MBR pass. Default is 0.01.} \item{useUniquePeptide}{should unique pepties be removed} diff --git a/man/MSstatsClean.Rd b/man/MSstatsClean.Rd index 3e162ca1..40702bfb 100644 --- a/man/MSstatsClean.Rd +++ b/man/MSstatsClean.Rd @@ -143,8 +143,7 @@ Default is 0.01.} \item{pg_qvalue_cutoff}{If MBR is false, the qvalue cutoff for the Global.PG.Q.Value column, i.e. the global q-value for the protein group. If MBR is true, the qvalue cutoff for the Lib.PG.Q.Value column, i.e. the protein group q-value for -the library created after the first MBR pass. Run should be the same as filename. -Default is 0.01.} +the library created after the first MBR pass. Default is 0.01.} } \value{ data.table diff --git a/man/dot-cleanRawDIANN.Rd b/man/dot-cleanRawDIANN.Rd index 1c92f607..6a61b2cc 100644 --- a/man/dot-cleanRawDIANN.Rd +++ b/man/dot-cleanRawDIANN.Rd @@ -33,8 +33,7 @@ Default is 0.01.} \item{pg_qvalue_cutoff}{If MBR is false, the qvalue cutoff for the Global.PG.Q.Value column, i.e. the global q-value for the protein group. If MBR is true, the qvalue cutoff for the Lib.PG.Q.Value column, i.e. the protein group q-value for -the library created after the first MBR pass. Run should be the same as filename. -Default is 0.01.} +the library created after the first MBR pass. Default is 0.01.} } \value{ data.table From ed2c5f530709e11d16ecb08f8bb816ce5a2edd7b Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Fri, 6 Feb 2026 14:45:39 -0500 Subject: [PATCH 4/5] fix logging --- R/clean_DIANN.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/clean_DIANN.R b/R/clean_DIANN.R index 96d49e63..afe75554 100644 --- a/R/clean_DIANN.R +++ b/R/clean_DIANN.R @@ -159,7 +159,7 @@ dn_input <- dn_input[!grepl("NH3|H2O", FragmentIon) & !is.na(get(quantificationColumn))] - msg = paste0('** Filtering on Global Q Value < ', global_qvalue_cutoff) + msg = paste0('** Filtering on Q.Value < ', global_qvalue_cutoff) getOption("MSstatsLog")("INFO", msg) getOption("MSstatsMsg")("INFO", msg) From 7d746052b2b36de2b76ed4913546e01565960b25 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Fri, 6 Feb 2026 15:03:37 -0500 Subject: [PATCH 5/5] add unit tests --- inst/tinytest/test_clean_DIANN.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/inst/tinytest/test_clean_DIANN.R b/inst/tinytest/test_clean_DIANN.R index e992480c..1f0077a6 100644 --- a/inst/tinytest/test_clean_DIANN.R +++ b/inst/tinytest/test_clean_DIANN.R @@ -25,3 +25,14 @@ output = MSstatsConvert:::.cleanRawDIANN(input) output = MSstatsConvert:::.cleanRawDIANN(input, quantificationColumn = "FragmentQuantRaw") .validateOutput(output) +# Q-value filtering +output = MSstatsConvert:::.cleanRawDIANN(input, global_qvalue_cutoff = 0.005) +expect_equal(sum(output$DetectionQValue < 0.005), nrow(output)) +output = MSstatsConvert:::.cleanRawDIANN(input, qvalue_cutoff = 0.00001) +expect_equal(sum(output$LibQValue < 0.00001), nrow(output)) +output = MSstatsConvert:::.cleanRawDIANN(input, pg_qvalue_cutoff = 0.001) +expect_equal(sum(output$LibPGQValue < 0.001), nrow(output)) +output = MSstatsConvert:::.cleanRawDIANN(input, MBR = TRUE, qvalue_cutoff = 0.005) +expect_equal(sum(output$LibQValue < 0.005), nrow(output)) +output = MSstatsConvert:::.cleanRawDIANN(input, MBR = TRUE, pg_qvalue_cutoff = 0.001) +expect_equal(sum(output$LibPGQValue < 0.001), nrow(output))