diff --git a/R/clean_Spectronaut.R b/R/clean_Spectronaut.R index 0eeac5d1..90f45da9 100644 --- a/R/clean_Spectronaut.R +++ b/R/clean_Spectronaut.R @@ -1,8 +1,6 @@ #' Clean raw Spectronaut output. #' @param msstats_object an object of class `MSstatsSpectronautFiles`. -#' @param intensity chr, specifies which column will be used for Intensity. -#' @param calculateAnomalyScores logical, whether to calculate anomaly scores -#' @param anomalyModelFeatures character vector, specifies which columns will be used for anomaly detection model. Can be NULL if calculateAnomalyScores=FALSE. +#' @inheritParams SpectronauttoMSstatsFormat #' @return `data.table` #' @keywords internal .cleanRawSpectronaut = function(msstats_object, intensity, @@ -20,10 +18,17 @@ colnames(spec_input)) exclude_col = .findAvailable(c("FExcludedFromQuantification"), colnames(spec_input)) + intensity_column_mapping = c( + "PeakArea" = "FPeakArea", + "NormalizedPeakArea" = "FNormalizedPeakArea", + "MS1Quantity" = "FGMS1Quantity" + ) + intensity = match.arg(intensity, names(intensity_column_mapping)) + intensity_column = intensity_column_mapping[[intensity]] cols = c("PGProteinGroups", "EGModifiedSequence", "FGCharge", "FFrgIon", f_charge_col, "RFileName", "RCondition", "RReplicate", "EGQvalue", pg_qval_col, interference_col, exclude_col, - paste0("F", intensity)) + intensity_column) if (calculateAnomalyScores){ cols = c(cols, anomalyModelFeatures) } @@ -32,7 +37,7 @@ data.table::setnames( spec_input, c("PGProteinGroups", "EGModifiedSequence", "FGCharge", "FFrgIon", - f_charge_col, "RFileName", paste0("F", intensity), + f_charge_col, "RFileName", intensity_column, "RCondition", "RReplicate"), c("ProteinName", "PeptideSequence", "PrecursorCharge", "FragmentIon", "ProductCharge", "Run", "Intensity", "Condition", "BioReplicate"), diff --git a/R/converters_SpectronauttoMSstatsFormat.R b/R/converters_SpectronauttoMSstatsFormat.R index 487a86c7..bf53f1cc 100644 --- a/R/converters_SpectronauttoMSstatsFormat.R +++ b/R/converters_SpectronauttoMSstatsFormat.R @@ -2,7 +2,8 @@ #' #' @param input name of Spectronaut output, which is long-format. ProteinName, PeptideSequence, PrecursorCharge, FragmentIon, ProductCharge, IsotopeLabelType, Condition, BioReplicate, Run, Intensity, F.ExcludedFromQuantification are required. Rows with F.ExcludedFromQuantification=True will be removed. #' @param annotation name of 'annotation.txt' data which includes Condition, BioReplicate, Run. If annotation is already complete in Spectronaut, use annotation=NULL (default). It will use the annotation information from input. -#' @param intensity 'PeakArea'(default) uses not normalized peak area. 'NormalizedPeakArea' uses peak area normalized by Spectronaut. +#' @param intensity 'PeakArea'(default) uses not normalized MS2 peak area. 'NormalizedPeakArea' uses MS2 peak area normalized by Spectronaut. +#' 'MS1Quantity' uses MS1 level quantification, which should be used if MS2 is unreliable. #' @param excludedFromQuantificationFilter Remove rows with F.ExcludedFromQuantification=TRUE Default is TRUE. #' @param filter_with_Qvalue FALSE(default) will not perform any filtering. TRUE will filter out the intensities that have greater than qvalue_cutoff in EG.Qvalue column. Those intensities will be replaced with zero and will be considered as censored missing values for imputation purpose. #' @param qvalue_cutoff Cutoff for EG.Qvalue. default is 0.01. @@ -32,7 +33,8 @@ #' head(spectronaut_imported) #' SpectronauttoMSstatsFormat = function( - input, annotation = NULL, intensity = 'PeakArea', + input, annotation = NULL, + intensity = c('PeakArea', 'NormalizedPeakArea', 'MS1Quantity'), excludedFromQuantificationFilter = TRUE, filter_with_Qvalue = FALSE, qvalue_cutoff = 0.01, useUniquePeptide = TRUE, removeFewMeasurements=TRUE, diff --git a/R/utils_checks.R b/R/utils_checks.R index 7d1458e3..7884903e 100644 --- a/R/utils_checks.R +++ b/R/utils_checks.R @@ -152,11 +152,6 @@ } } - # Intensity validation (if provided) - if (!is.null(config$intensity)) { - checkmate::assertString(config$intensity) - } - # Q-value filtering parameters checkmate::assertLogical(config$filter_with_Qvalue, len = 1) checkmate::assertNumber(config$qvalue_cutoff, lower = 0, upper = 1) diff --git a/inst/tinytest/test_clean_Spectronaut.R b/inst/tinytest/test_clean_Spectronaut.R new file mode 100644 index 00000000..47d7b429 --- /dev/null +++ b/inst/tinytest/test_clean_Spectronaut.R @@ -0,0 +1,16 @@ +# Test intensity parameter +spectronaut_raw = system.file("tinytest/raw_data/Spectronaut/spectronaut_input.csv", + package = "MSstatsConvert") +spectronaut_raw = data.table::fread(spectronaut_raw) +spectronaut_raw$FG.MS1Quantity = 100000 +msstats_input = MSstatsConvert::MSstatsImport( + list(input = spectronaut_raw), "MSstats", "Spectronaut") +output = MSstatsConvert:::.cleanRawSpectronaut(msstats_input, intensity = 'MS1Quantity', + calculateAnomalyScores = FALSE, + anomalyModelFeatures = c()) +expect_true(all(output$Intensity == 100000)) + +expect_error(MSstatsConvert:::.cleanRawSpectronaut(msstats_input, intensity = 'invalid', + calculateAnomalyScores = FALSE, + anomalyModelFeatures = c()), + pattern = "'arg' should be one of .*PeakArea") diff --git a/man/MSstatsClean.Rd b/man/MSstatsClean.Rd index 40702bfb..c5338a47 100644 --- a/man/MSstatsClean.Rd +++ b/man/MSstatsClean.Rd @@ -113,11 +113,12 @@ in TMT data.} Defaults to "Abundance", which means that columns that contain the word "Abundance" will be treated as corresponding to intensities for different channels.} -\item{intensity}{chr, specifies which column will be used for Intensity.} +\item{intensity}{'PeakArea'(default) uses not normalized MS2 peak area. 'NormalizedPeakArea' uses MS2 peak area normalized by Spectronaut. +'MS1Quantity' uses MS1 level quantification, which should be used if MS2 is unreliable.} -\item{calculateAnomalyScores}{logical, whether to calculate anomaly scores} +\item{calculateAnomalyScores}{Default is FALSE. If TRUE, will run anomaly detection model and calculate anomaly scores for each feature. Used downstream to weigh measurements in differential analysis.} -\item{anomalyModelFeatures}{character vector, specifies which columns will be used for anomaly detection model. Can be NULL if calculateAnomalyScores=FALSE.} +\item{anomalyModelFeatures}{character vector of quality metric column names to be used as features in the anomaly detection model. List must not be empty if calculateAnomalyScores=TRUE.} \item{peptide_id_col}{character name of a column that identifies peptides} diff --git a/man/SpectronauttoMSstatsFormat.Rd b/man/SpectronauttoMSstatsFormat.Rd index 7e93e486..30956a78 100644 --- a/man/SpectronauttoMSstatsFormat.Rd +++ b/man/SpectronauttoMSstatsFormat.Rd @@ -36,7 +36,8 @@ SpectronauttoMSstatsFormat( \item{annotation}{name of 'annotation.txt' data which includes Condition, BioReplicate, Run. If annotation is already complete in Spectronaut, use annotation=NULL (default). It will use the annotation information from input.} -\item{intensity}{'PeakArea'(default) uses not normalized peak area. 'NormalizedPeakArea' uses peak area normalized by Spectronaut.} +\item{intensity}{'PeakArea'(default) uses not normalized MS2 peak area. 'NormalizedPeakArea' uses MS2 peak area normalized by Spectronaut. +'MS1Quantity' uses MS1 level quantification, which should be used if MS2 is unreliable.} \item{excludedFromQuantificationFilter}{Remove rows with F.ExcludedFromQuantification=TRUE Default is TRUE.} diff --git a/man/dot-cleanRawSpectronaut.Rd b/man/dot-cleanRawSpectronaut.Rd index 8ec72cce..70d2a600 100644 --- a/man/dot-cleanRawSpectronaut.Rd +++ b/man/dot-cleanRawSpectronaut.Rd @@ -14,11 +14,12 @@ \arguments{ \item{msstats_object}{an object of class \code{MSstatsSpectronautFiles}.} -\item{intensity}{chr, specifies which column will be used for Intensity.} +\item{intensity}{'PeakArea'(default) uses not normalized MS2 peak area. 'NormalizedPeakArea' uses MS2 peak area normalized by Spectronaut. +'MS1Quantity' uses MS1 level quantification, which should be used if MS2 is unreliable.} -\item{calculateAnomalyScores}{logical, whether to calculate anomaly scores} +\item{calculateAnomalyScores}{Default is FALSE. If TRUE, will run anomaly detection model and calculate anomaly scores for each feature. Used downstream to weigh measurements in differential analysis.} -\item{anomalyModelFeatures}{character vector, specifies which columns will be used for anomaly detection model. Can be NULL if calculateAnomalyScores=FALSE.} +\item{anomalyModelFeatures}{character vector of quality metric column names to be used as features in the anomaly detection model. List must not be empty if calculateAnomalyScores=TRUE.} } \value{ \code{data.table}