From 4058cc56725a7502fd7ee2dbc0ee992c62a293d7 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Sat, 7 Feb 2026 22:09:19 -0500 Subject: [PATCH 1/6] Add option to use MS1Quantity for quantifications for Spectronaut --- R/clean_Spectronaut.R | 14 +++++++++----- R/converters_SpectronauttoMSstatsFormat.R | 3 ++- man/MSstatsClean.Rd | 7 ++++--- man/SpectronauttoMSstatsFormat.Rd | 3 ++- man/dot-cleanRawSpectronaut.Rd | 7 ++++--- 5 files changed, 21 insertions(+), 13 deletions(-) diff --git a/R/clean_Spectronaut.R b/R/clean_Spectronaut.R index 0eeac5d1..b1333fe8 100644 --- a/R/clean_Spectronaut.R +++ b/R/clean_Spectronaut.R @@ -1,8 +1,6 @@ #' Clean raw Spectronaut output. #' @param msstats_object an object of class `MSstatsSpectronautFiles`. -#' @param intensity chr, specifies which column will be used for Intensity. -#' @param calculateAnomalyScores logical, whether to calculate anomaly scores -#' @param anomalyModelFeatures character vector, specifies which columns will be used for anomaly detection model. Can be NULL if calculateAnomalyScores=FALSE. +#' @inheritParams SpectronauttoMSstatsFormat #' @return `data.table` #' @keywords internal .cleanRawSpectronaut = function(msstats_object, intensity, @@ -20,10 +18,16 @@ colnames(spec_input)) exclude_col = .findAvailable(c("FExcludedFromQuantification"), colnames(spec_input)) + intensity_column_mapping = c( + "PeakArea" = "FPeakArea", + "NormalizedPeakArea" = "FNormalizedPeakArea", + "MS1Quantity" = "FGMS1Quantity" + ) + intensity_column = intensity_column_mapping[[intensity]] cols = c("PGProteinGroups", "EGModifiedSequence", "FGCharge", "FFrgIon", f_charge_col, "RFileName", "RCondition", "RReplicate", "EGQvalue", pg_qval_col, interference_col, exclude_col, - paste0("F", intensity)) + intensity_column) if (calculateAnomalyScores){ cols = c(cols, anomalyModelFeatures) } @@ -32,7 +36,7 @@ data.table::setnames( spec_input, c("PGProteinGroups", "EGModifiedSequence", "FGCharge", "FFrgIon", - f_charge_col, "RFileName", paste0("F", intensity), + f_charge_col, "RFileName", intensity_column, "RCondition", "RReplicate"), c("ProteinName", "PeptideSequence", "PrecursorCharge", "FragmentIon", "ProductCharge", "Run", "Intensity", "Condition", "BioReplicate"), diff --git a/R/converters_SpectronauttoMSstatsFormat.R b/R/converters_SpectronauttoMSstatsFormat.R index 487a86c7..f3f0baa9 100644 --- a/R/converters_SpectronauttoMSstatsFormat.R +++ b/R/converters_SpectronauttoMSstatsFormat.R @@ -2,7 +2,8 @@ #' #' @param input name of Spectronaut output, which is long-format. ProteinName, PeptideSequence, PrecursorCharge, FragmentIon, ProductCharge, IsotopeLabelType, Condition, BioReplicate, Run, Intensity, F.ExcludedFromQuantification are required. Rows with F.ExcludedFromQuantification=True will be removed. #' @param annotation name of 'annotation.txt' data which includes Condition, BioReplicate, Run. If annotation is already complete in Spectronaut, use annotation=NULL (default). It will use the annotation information from input. -#' @param intensity 'PeakArea'(default) uses not normalized peak area. 'NormalizedPeakArea' uses peak area normalized by Spectronaut. +#' @param intensity 'PeakArea'(default) uses not normalized MS2 peak area. 'NormalizedPeakArea' uses MS2 peak area normalized by Spectronaut. +#' 'MS1Quantity' uses MS1 level quantification, which should be used if MS2 is unreliable. #' @param excludedFromQuantificationFilter Remove rows with F.ExcludedFromQuantification=TRUE Default is TRUE. #' @param filter_with_Qvalue FALSE(default) will not perform any filtering. TRUE will filter out the intensities that have greater than qvalue_cutoff in EG.Qvalue column. Those intensities will be replaced with zero and will be considered as censored missing values for imputation purpose. #' @param qvalue_cutoff Cutoff for EG.Qvalue. default is 0.01. diff --git a/man/MSstatsClean.Rd b/man/MSstatsClean.Rd index 40702bfb..c5338a47 100644 --- a/man/MSstatsClean.Rd +++ b/man/MSstatsClean.Rd @@ -113,11 +113,12 @@ in TMT data.} Defaults to "Abundance", which means that columns that contain the word "Abundance" will be treated as corresponding to intensities for different channels.} -\item{intensity}{chr, specifies which column will be used for Intensity.} +\item{intensity}{'PeakArea'(default) uses not normalized MS2 peak area. 'NormalizedPeakArea' uses MS2 peak area normalized by Spectronaut. +'MS1Quantity' uses MS1 level quantification, which should be used if MS2 is unreliable.} -\item{calculateAnomalyScores}{logical, whether to calculate anomaly scores} +\item{calculateAnomalyScores}{Default is FALSE. If TRUE, will run anomaly detection model and calculate anomaly scores for each feature. Used downstream to weigh measurements in differential analysis.} -\item{anomalyModelFeatures}{character vector, specifies which columns will be used for anomaly detection model. Can be NULL if calculateAnomalyScores=FALSE.} +\item{anomalyModelFeatures}{character vector of quality metric column names to be used as features in the anomaly detection model. List must not be empty if calculateAnomalyScores=TRUE.} \item{peptide_id_col}{character name of a column that identifies peptides} diff --git a/man/SpectronauttoMSstatsFormat.Rd b/man/SpectronauttoMSstatsFormat.Rd index 7e93e486..30956a78 100644 --- a/man/SpectronauttoMSstatsFormat.Rd +++ b/man/SpectronauttoMSstatsFormat.Rd @@ -36,7 +36,8 @@ SpectronauttoMSstatsFormat( \item{annotation}{name of 'annotation.txt' data which includes Condition, BioReplicate, Run. If annotation is already complete in Spectronaut, use annotation=NULL (default). It will use the annotation information from input.} -\item{intensity}{'PeakArea'(default) uses not normalized peak area. 'NormalizedPeakArea' uses peak area normalized by Spectronaut.} +\item{intensity}{'PeakArea'(default) uses not normalized MS2 peak area. 'NormalizedPeakArea' uses MS2 peak area normalized by Spectronaut. +'MS1Quantity' uses MS1 level quantification, which should be used if MS2 is unreliable.} \item{excludedFromQuantificationFilter}{Remove rows with F.ExcludedFromQuantification=TRUE Default is TRUE.} diff --git a/man/dot-cleanRawSpectronaut.Rd b/man/dot-cleanRawSpectronaut.Rd index 8ec72cce..70d2a600 100644 --- a/man/dot-cleanRawSpectronaut.Rd +++ b/man/dot-cleanRawSpectronaut.Rd @@ -14,11 +14,12 @@ \arguments{ \item{msstats_object}{an object of class \code{MSstatsSpectronautFiles}.} -\item{intensity}{chr, specifies which column will be used for Intensity.} +\item{intensity}{'PeakArea'(default) uses not normalized MS2 peak area. 'NormalizedPeakArea' uses MS2 peak area normalized by Spectronaut. +'MS1Quantity' uses MS1 level quantification, which should be used if MS2 is unreliable.} -\item{calculateAnomalyScores}{logical, whether to calculate anomaly scores} +\item{calculateAnomalyScores}{Default is FALSE. If TRUE, will run anomaly detection model and calculate anomaly scores for each feature. Used downstream to weigh measurements in differential analysis.} -\item{anomalyModelFeatures}{character vector, specifies which columns will be used for anomaly detection model. Can be NULL if calculateAnomalyScores=FALSE.} +\item{anomalyModelFeatures}{character vector of quality metric column names to be used as features in the anomaly detection model. List must not be empty if calculateAnomalyScores=TRUE.} } \value{ \code{data.table} From 3fb4464fc12140ddc611ba6fd04e3ecefc19cdd9 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 10 Feb 2026 13:33:01 -0500 Subject: [PATCH 2/6] enable ms1 quantity tests --- inst/tinytest/test_clean_Spectronaut.R | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 inst/tinytest/test_clean_Spectronaut.R diff --git a/inst/tinytest/test_clean_Spectronaut.R b/inst/tinytest/test_clean_Spectronaut.R new file mode 100644 index 00000000..8076785c --- /dev/null +++ b/inst/tinytest/test_clean_Spectronaut.R @@ -0,0 +1,11 @@ +# Test MS1Quantity value is used for intensity. +spectronaut_raw = system.file("tinytest/raw_data/Spectronaut/spectronaut_input.csv", + package = "MSstatsConvert") +spectronaut_raw = data.table::fread(spectronaut_raw) +spectronaut_raw$FG.MS1Quantity = 100000 +msstats_input = MSstatsConvert::MSstatsImport( + list(input = spectronaut_raw), "MSstats", "Spectronaut") +output = MSstatsConvert:::.cleanRawSpectronaut(msstats_input, intensity = 'MS1Quantity', + calculateAnomalyScores = FALSE, + anomalyModelFeatures = c()) +expect_true(all(output$Intensity == 100000)) From 262b9985c484c59854c12ee8de4807202ab1403e Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 10 Feb 2026 13:45:36 -0500 Subject: [PATCH 3/6] add tests and validation --- R/clean_Spectronaut.R | 1 + inst/tinytest/test_clean_Spectronaut.R | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/R/clean_Spectronaut.R b/R/clean_Spectronaut.R index b1333fe8..90f45da9 100644 --- a/R/clean_Spectronaut.R +++ b/R/clean_Spectronaut.R @@ -23,6 +23,7 @@ "NormalizedPeakArea" = "FNormalizedPeakArea", "MS1Quantity" = "FGMS1Quantity" ) + intensity = match.arg(intensity, names(intensity_column_mapping)) intensity_column = intensity_column_mapping[[intensity]] cols = c("PGProteinGroups", "EGModifiedSequence", "FGCharge", "FFrgIon", f_charge_col, "RFileName", "RCondition", "RReplicate", diff --git a/inst/tinytest/test_clean_Spectronaut.R b/inst/tinytest/test_clean_Spectronaut.R index 8076785c..a5586cf7 100644 --- a/inst/tinytest/test_clean_Spectronaut.R +++ b/inst/tinytest/test_clean_Spectronaut.R @@ -1,4 +1,4 @@ -# Test MS1Quantity value is used for intensity. +# Test intensity parameter spectronaut_raw = system.file("tinytest/raw_data/Spectronaut/spectronaut_input.csv", package = "MSstatsConvert") spectronaut_raw = data.table::fread(spectronaut_raw) @@ -9,3 +9,8 @@ output = MSstatsConvert:::.cleanRawSpectronaut(msstats_input, intensity = 'MS1Qu calculateAnomalyScores = FALSE, anomalyModelFeatures = c()) expect_true(all(output$Intensity == 100000)) + +expect_error(MSstatsConvert:::.cleanRawSpectronaut(msstats_input, intensity = 'invalid', + calculateAnomalyScores = FALSE, + anomalyModelFeatures = c()), + pattern = "'arg' should be one of “PeakArea”") From 9d3e362d5b6584fa4e03ed3bb4edc0c2513735f4 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 10 Feb 2026 13:46:45 -0500 Subject: [PATCH 4/6] use match.args naming convention --- R/converters_SpectronauttoMSstatsFormat.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/converters_SpectronauttoMSstatsFormat.R b/R/converters_SpectronauttoMSstatsFormat.R index f3f0baa9..bf53f1cc 100644 --- a/R/converters_SpectronauttoMSstatsFormat.R +++ b/R/converters_SpectronauttoMSstatsFormat.R @@ -33,7 +33,8 @@ #' head(spectronaut_imported) #' SpectronauttoMSstatsFormat = function( - input, annotation = NULL, intensity = 'PeakArea', + input, annotation = NULL, + intensity = c('PeakArea', 'NormalizedPeakArea', 'MS1Quantity'), excludedFromQuantificationFilter = TRUE, filter_with_Qvalue = FALSE, qvalue_cutoff = 0.01, useUniquePeptide = TRUE, removeFewMeasurements=TRUE, From c0e188d70d19171442d61c6c00cc53fda062c553 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 10 Feb 2026 13:57:46 -0500 Subject: [PATCH 5/6] remove validation for intensity in the validation function. --- R/utils_checks.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/R/utils_checks.R b/R/utils_checks.R index 7d1458e3..7884903e 100644 --- a/R/utils_checks.R +++ b/R/utils_checks.R @@ -152,11 +152,6 @@ } } - # Intensity validation (if provided) - if (!is.null(config$intensity)) { - checkmate::assertString(config$intensity) - } - # Q-value filtering parameters checkmate::assertLogical(config$filter_with_Qvalue, len = 1) checkmate::assertNumber(config$qvalue_cutoff, lower = 0, upper = 1) From 908e6af180e63e8951cebcc5fbd61e4c8d7f342f Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 10 Feb 2026 14:04:18 -0500 Subject: [PATCH 6/6] fix unit test --- inst/tinytest/test_clean_Spectronaut.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tinytest/test_clean_Spectronaut.R b/inst/tinytest/test_clean_Spectronaut.R index a5586cf7..47d7b429 100644 --- a/inst/tinytest/test_clean_Spectronaut.R +++ b/inst/tinytest/test_clean_Spectronaut.R @@ -13,4 +13,4 @@ expect_true(all(output$Intensity == 100000)) expect_error(MSstatsConvert:::.cleanRawSpectronaut(msstats_input, intensity = 'invalid', calculateAnomalyScores = FALSE, anomalyModelFeatures = c()), - pattern = "'arg' should be one of “PeakArea”") + pattern = "'arg' should be one of .*PeakArea")