Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 24 additions & 25 deletions R/utils_normalize.R
Original file line number Diff line number Diff line change
Expand Up @@ -192,32 +192,31 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s
.normalizeGlobalStandards = function(input, peptides_dict, standards) {
PeptideSequence = PEPTIDE = PROTEIN = median_by_fraction = NULL
Standard = FRACTION = LABEL = ABUNDANCE = RUN = GROUP = NULL

proteins = as.character(unique(input$PROTEIN))
means_by_standard = unique(input[, list(RUN)])
for (standard_id in seq_along(standards)) {
peptide_name = unlist(peptides_dict[PeptideSequence == standards[standard_id],
as.character(PEPTIDE)], FALSE, FALSE)
if (length(peptide_name) > 0) {
standard = input[PEPTIDE == peptide_name, ]
} else {
if (standards[standard_id] %in% proteins) {
standard = input[PROTEIN == standards[standard_id], ]
} else {
msg = paste("global standard peptides or proteins, ",
standards[standard_id],", is not in dataset.",
"Please check whether 'nameStandards' input is correct or not.")
getOption("MSstatsLog")("ERROR", msg)
stop(msg)
}
}
mean_by_run = standard[GROUP != "0" & !is.na(ABUNDANCE),
list(mean_abundance = mean(ABUNDANCE, na.rm = TRUE)),
by = "RUN"]
colnames(mean_by_run)[2] = paste0("meanStandard", standard_id)
means_by_standard = merge(means_by_standard, mean_by_run,
by = "RUN", all.x = TRUE)

input_with_peptides <- merge(input, peptides_dict, by = "PEPTIDE", all.x = TRUE)
standards_data <- input_with_peptides[
(PeptideSequence %in% standards | PROTEIN %in% standards) &
GROUP != "0" &
!is.na(ABUNDANCE)
]
missing_standards <- standards[!standards %in% c(standards_data$PeptideSequence, standards_data$PROTEIN)]
if (length(missing_standards) > 0) {
msg <- paste("Global standard peptides or proteins,",
paste(missing_standards, collapse = ", "),
"are not in dataset. Please check whether 'nameStandards' input is correct.")
getOption("MSstatsLog")("ERROR", msg)
stop(msg)
}
standards_data[, standard := ifelse(!is.na(PeptideSequence) & PeptideSequence %in% standards,
PeptideSequence,
PROTEIN)]
means_by_standard <- standards_data[,
list(mean_abundance = mean(ABUNDANCE, na.rm = TRUE)),
by = .(RUN, standard)
]
means_by_standard <- dcast(means_by_standard,
RUN ~ standard,
value.var = "mean_abundance")
means_by_standard = data.table::melt(means_by_standard, id.vars = "RUN",
variable.name = "Standard", value.name = "ABUNDANCE")
means_by_standard[, mean_by_run := mean(ABUNDANCE, na.rm = TRUE), by = "RUN"]
Expand Down
111 changes: 111 additions & 0 deletions inst/tinytest/test_utils_normalize.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Test for .normalizeGlobalStandards function ----------------------------------

# Setup test data ---------------------------------------------------------------

create_peptide_dictionary <- function() {
data.table::data.table(
PeptideSequence = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSRD"),
PrecursorCharge = c(3, 2, 3),
PEPTIDE = c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAAAGAGAGAK_2", "AAAAAAAAAAAAVSRD_3")
)
}

create_test_input <- function(standard_intensities, peptide2_intensities, peptide3_intensities) {
# Constants
n_proteins <- 3
n_runs <- 48
n_subjects <- 4
n_fractions <- 6

# Create base structure
input <- data.table::data.table(
PROTEIN = rep(c("P1", "P1", "P3"), each = n_runs),
PEPTIDE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAAAGAGAGAK_2", "AAAAAAAAAAAAVSRD_3"),
each = n_runs),
TRANSITION = rep("NA_NA", n_proteins * n_runs),
FEATURE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3_NA_NA",
"AAAAAAAAAAAAAAGAGAGAK_2_NA_NA",
"AAAAAAAAAAAAVSRD_3_NA_NA"),
each = n_runs),
LABEL = rep("L", n_proteins * n_runs),
GROUP_ORIGINAL = rep(rep(c("Control", "Treatment"), each = n_runs/2), n_proteins),
SUBJECT_ORIGINAL = rep(paste0("Subject", rep(1:n_subjects, each = n_fractions)),
n_proteins * 2),
RUN = rep(1:n_runs, n_proteins),
GROUP = rep(rep(c("Control", "Treatment"), each = n_runs/2), n_proteins),
SUBJECT = rep(paste0("Subject", rep(1:n_subjects, each = n_fractions)),
n_proteins * 2),
FRACTION = rep(rep(1:n_fractions, n_subjects), n_proteins * 2),
INTENSITY = c(standard_intensities, peptide2_intensities, peptide3_intensities),
ANOMALYSCORES = rep(NA, n_proteins * n_runs),
originalRUN = rep(paste0("Run", 1:n_runs), n_proteins)
)

input[, ABUNDANCE := log2(INTENSITY)]
return(input)
}

# Test 1: Standards with different intensities between groups -------------------
test_different_group_intensities <- function() {
peptide_dict <- create_peptide_dictionary()
standards <- c("AAAAAAAAAAAAAAGAGAGAK")

# Control group: 262144, Treatment group: 524288
standard_intensities <- c(
rep(262144, 24), # Control (runs 1-24)
rep(524288, 24) # Treatment (runs 25-48)
)

# Non-standard peptides: all 262144
peptide3_intensities <- rep(262144, 48)

input <- create_test_input(standard_intensities, standard_intensities, peptide3_intensities)
output <- MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards)

# Verify normalization: Control runs should be shifted up, Treatment runs shifted down
control_runs <- 1:24
treatment_runs <- 25:48

# Check Control group (shifted up to match treatment standard)
control_abundance <- output[RUN %in% control_runs &
!is.na(ABUNDANCE) &
!grepl(standards, PEPTIDE)]$ABUNDANCE
expect_true(all(abs(control_abundance - 18.5) < 1e-10),
info = "Control group non-standard peptides should be normalized to 18.5")

# Check Treatment group (shifted down to match control standard)
treatment_abundance <- output[RUN %in% treatment_runs &
!is.na(ABUNDANCE) &
!grepl(standards, PEPTIDE)]$ABUNDANCE
expect_true(all(abs(treatment_abundance - 17.5) < 1e-10),
info = "Treatment group non-standard peptides should be normalized to 17.5")
}

# Test 2: Standards with alternating intensities within fractions ---------------
test_alternating_intensities_within_fractions <- function() {
peptide_dict <- create_peptide_dictionary()
standards <- c("AAAAAAAAAAAAAAGAGAGAK")

# Standard alternates between 262144 and 524288 within each fraction
standard_intensities <- rep(c(262144, 524288), 24)

# Non-standard peptides: all 262144
peptide3_intensities <- rep(262144, 48)

input <- create_test_input(standard_intensities, standard_intensities, peptide3_intensities)
output <- MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards)

# When standards vary within fractions but average to same level,
# no net normalization should occur
all_runs <- 1:48
normalized_abundance <- output[RUN %in% all_runs &
!is.na(ABUNDANCE) &
!grepl(standards, PEPTIDE)]$ABUNDANCE

expect_true(all(abs(normalized_abundance - 18) < 1e-10),
info = "No normalization should occur when standard averages are equal across fractions")
}

# Run tests ---------------------------------------------------------------------
test_different_group_intensities()
test_alternating_intensities_within_fractions()
Loading