From 71cc52678ce0549e09bc1f56a93e520140f7d30d Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 13 Jan 2026 10:45:46 -0500 Subject: [PATCH 1/8] fix(global-standards): Enable multiple precursors for the same reference peptide --- R/utils_normalize.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils_normalize.R b/R/utils_normalize.R index 70820e20..da5bda43 100644 --- a/R/utils_normalize.R +++ b/R/utils_normalize.R @@ -199,7 +199,7 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s peptide_name = unlist(peptides_dict[PeptideSequence == standards[standard_id], as.character(PEPTIDE)], FALSE, FALSE) if (length(peptide_name) > 0) { - standard = input[PEPTIDE == peptide_name, ] + standard = input[PEPTIDE %in% peptide_name, ] } else { if (standards[standard_id] %in% proteins) { standard = input[PROTEIN == standards[standard_id], ] From e4975eba91c9aa5788a4e91f97539e85685750d3 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 13 Jan 2026 11:17:23 -0500 Subject: [PATCH 2/8] make normalize by global standards more efficient --- R/utils_normalize.R | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/R/utils_normalize.R b/R/utils_normalize.R index da5bda43..ce6d2577 100644 --- a/R/utils_normalize.R +++ b/R/utils_normalize.R @@ -195,29 +195,30 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s proteins = as.character(unique(input$PROTEIN)) means_by_standard = unique(input[, list(RUN)]) - for (standard_id in seq_along(standards)) { - peptide_name = unlist(peptides_dict[PeptideSequence == standards[standard_id], - as.character(PEPTIDE)], FALSE, FALSE) - if (length(peptide_name) > 0) { - standard = input[PEPTIDE %in% peptide_name, ] - } else { - if (standards[standard_id] %in% proteins) { - standard = input[PROTEIN == standards[standard_id], ] - } else { - msg = paste("global standard peptides or proteins, ", - standards[standard_id],", is not in dataset.", - "Please check whether 'nameStandards' input is correct or not.") - getOption("MSstatsLog")("ERROR", msg) - stop(msg) - } - } - mean_by_run = standard[GROUP != "0" & !is.na(ABUNDANCE), - list(mean_abundance = mean(ABUNDANCE, na.rm = TRUE)), - by = "RUN"] - colnames(mean_by_run)[2] = paste0("meanStandard", standard_id) - means_by_standard = merge(means_by_standard, mean_by_run, - by = "RUN", all.x = TRUE) + input_with_peptides <- merge(input, peptides_dict, by = "PEPTIDE", all.x = TRUE) + standards_data <- input_with_peptides[ + (PeptideSequence %in% standards | PROTEIN %in% standards) & + GROUP != "0" & + !is.na(ABUNDANCE) + ] + missing_standards <- standards[!standards %in% c(standards_data$PeptideSequence, standards_data$PROTEIN)] + if (length(missing_standards) > 0) { + msg <- paste("Global standard peptides or proteins,", + paste(missing_standards, collapse = ", "), + "are not in dataset. Please check whether 'nameStandards' input is correct.") + getOption("MSstatsLog")("ERROR", msg) + stop(msg) } + standards_data[, standard := ifelse(!is.na(PeptideSequence) & PeptideSequence %in% standards, + PeptideSequence, + PROTEIN)] + means_by_standard <- standards_data[, + list(mean_abundance = mean(ABUNDANCE, na.rm = TRUE)), + by = .(RUN, standard) + ] + means_by_standard <- dcast(means_by_standard, + RUN ~ standard, + value.var = "mean_abundance") means_by_standard = data.table::melt(means_by_standard, id.vars = "RUN", variable.name = "Standard", value.name = "ABUNDANCE") means_by_standard[, mean_by_run := mean(ABUNDANCE, na.rm = TRUE), by = "RUN"] From fbc8879ae4500202d85fec113304482504066046 Mon Sep 17 00:00:00 2001 From: tonywu1999 Date: Wed, 14 Jan 2026 13:05:27 -0500 Subject: [PATCH 3/8] tests: add initial mock data for unit tests --- inst/tinytest/test_utils_normalize.R | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 inst/tinytest/test_utils_normalize.R diff --git a/inst/tinytest/test_utils_normalize.R b/inst/tinytest/test_utils_normalize.R new file mode 100644 index 00000000..56e27c18 --- /dev/null +++ b/inst/tinytest/test_utils_normalize.R @@ -0,0 +1,26 @@ +peptide_dict <- data.table::data.table( + PeptideSequence = c( "AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD"), + PrecursorCharge = c( 3, 2, 3), + PEPTIDE = c( "AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3") +) + +input <- data.table( + PROTEIN = c( "P1", "P1", "P1", "P1", "P1", "P1" ), + PEPTIDE = c( "PEP1", "PEP1", "PEP1", "PEP1", "PEP1", "PEP1" ), + TRANSITION = c( "NA_NA", "NA_NA", "NA_NA", "NA_NA", "NA_NA", "NA_NA" ), + FEATURE = c( "PEP1_2_NA_NA", "PEP1_3_NA_NA", "PEP1_2_NA_NA", "PEP1_2_NA_NA", "PEP1_3_NA_NA", "PEP1_2_NA_NA" ), + LABEL = c( "L", "L", "L", "L", "L", "L" ), + GROUP_ORIGINAL = c( "0hr", "0hr", "0hr", "0hr", "0hr", "0hr" ), + SUBJECT_ORIGINAL = c( "0hr", "0hr", "0hr", "0hr", "0hr", "0hr" ), + RUN = c( 1, 1, 1, 1, 1, 1 ), + GROUP = c( "0hr", "0hr", "0hr", "0hr", "0hr", "0hr" ), + SUBJECT = c( "0hr", "0hr", "0hr", "0hr", "0hr", "0hr" ), + FRACTION = c( 1, 1, 1, 1, 1, 1 ), + INTENSITY = c( 180697888, 674297.25, NA, 267224.25, NA, NA ), + ANOMALYSCORES = c( NA, NA, NA, NA, NA, NA ), + ABUNDANCE = c( 27.429, 19.363, NA, 18.028, NA, NA ), + originalRUN = c( "Run1", "Run1", "Run1", "Run1", "Run1", "Run1" ) +) + +standards = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD") +output = MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards) From c34ca7444739bb4758dd7f3dbbbf255bdefe24b4 Mon Sep 17 00:00:00 2001 From: tonywu1999 Date: Wed, 14 Jan 2026 13:24:53 -0500 Subject: [PATCH 4/8] modify test dataset --- inst/tinytest/test_utils_normalize.R | 84 +++++++++++++++++++++------- 1 file changed, 63 insertions(+), 21 deletions(-) diff --git a/inst/tinytest/test_utils_normalize.R b/inst/tinytest/test_utils_normalize.R index 56e27c18..83b129a6 100644 --- a/inst/tinytest/test_utils_normalize.R +++ b/inst/tinytest/test_utils_normalize.R @@ -1,26 +1,68 @@ -peptide_dict <- data.table::data.table( - PeptideSequence = c( "AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD"), - PrecursorCharge = c( 3, 2, 3), - PEPTIDE = c( "AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3") +library(data.table) + +# Peptide dictionary +peptide_dict <- data.table( + PeptideSequence = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD"), + PrecursorCharge = c(3, 2, 3), + PEPTIDE = c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3") ) +# Standards (matching peptide sequences) +standards = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD") + +# Create input data with 4 runs, varying fractions (1-3), 2 groups +set.seed(123) input <- data.table( - PROTEIN = c( "P1", "P1", "P1", "P1", "P1", "P1" ), - PEPTIDE = c( "PEP1", "PEP1", "PEP1", "PEP1", "PEP1", "PEP1" ), - TRANSITION = c( "NA_NA", "NA_NA", "NA_NA", "NA_NA", "NA_NA", "NA_NA" ), - FEATURE = c( "PEP1_2_NA_NA", "PEP1_3_NA_NA", "PEP1_2_NA_NA", "PEP1_2_NA_NA", "PEP1_3_NA_NA", "PEP1_2_NA_NA" ), - LABEL = c( "L", "L", "L", "L", "L", "L" ), - GROUP_ORIGINAL = c( "0hr", "0hr", "0hr", "0hr", "0hr", "0hr" ), - SUBJECT_ORIGINAL = c( "0hr", "0hr", "0hr", "0hr", "0hr", "0hr" ), - RUN = c( 1, 1, 1, 1, 1, 1 ), - GROUP = c( "0hr", "0hr", "0hr", "0hr", "0hr", "0hr" ), - SUBJECT = c( "0hr", "0hr", "0hr", "0hr", "0hr", "0hr" ), - FRACTION = c( 1, 1, 1, 1, 1, 1 ), - INTENSITY = c( 180697888, 674297.25, NA, 267224.25, NA, NA ), - ANOMALYSCORES = c( NA, NA, NA, NA, NA, NA ), - ABUNDANCE = c( 27.429, 19.363, NA, 18.028, NA, NA ), - originalRUN = c( "Run1", "Run1", "Run1", "Run1", "Run1", "Run1" ) + PROTEIN = rep(c("P1", "P2", "P3"), each = 48), # 3 proteins + PEPTIDE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3"), each = 48), + TRANSITION = rep("NA_NA", 144), + FEATURE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3_NA_NA", "AAAAAAAAAAAAVSR_2_NA_NA", "AAAAAAAAAAAAVSRD_3_NA_NA"), each = 48), + LABEL = rep("L", 144), + GROUP_ORIGINAL = rep(rep(c("Control", "Treatment"), each = 24), 3), + SUBJECT_ORIGINAL = rep(rep(c("Control", "Treatment"), each = 24), 3), + RUN = rep(rep(1:4, each = 6), 6), # 4 runs + GROUP = rep(rep(c("Control", "Treatment"), each = 24), 3), + SUBJECT = rep(rep(c("Control", "Treatment"), each = 24), 3), + FRACTION = rep(rep(c(1, 1, 2, 2, 3, 3), 4), 6), # Fractions 1-3 + INTENSITY = c( + # Standard 1 (AAAAAAAAAAAAAAGAGAGAK_3) + 150000, 148000, 152000, 149000, 151000, 147000, # Run 1 + 145000, 143000, 147000, 144000, 146000, 142000, # Run 2 + 200000, 198000, 202000, 199000, 201000, 197000, # Run 3 + 195000, 193000, 197000, 194000, 196000, 192000, # Run 4 + 160000, 158000, 162000, 159000, 161000, 157000, # Run 1 treatment + 155000, 153000, 157000, 154000, 156000, 152000, # Run 2 treatment + 210000, 208000, 212000, 209000, 211000, 207000, # Run 3 treatment + 205000, 203000, 207000, 204000, 206000, 202000, # Run 4 treatment + # Standard 2 (AAAAAAAAAAAAVSR_2) + 500000, 498000, 502000, 499000, 501000, 497000, + 495000, 493000, 497000, 494000, 496000, 492000, + 550000, 548000, 552000, 549000, 551000, 547000, + 545000, 543000, 547000, 544000, 546000, 542000, + 510000, 508000, 512000, 509000, 511000, 507000, + 505000, 503000, 507000, 504000, 506000, 502000, + 560000, 558000, 562000, 559000, 561000, 557000, + 555000, 553000, 557000, 554000, 556000, 552000, + # Standard 3 (AAAAAAAAAAAAVSRD_3) + 250000, 248000, 252000, 249000, 251000, 247000, + 245000, 243000, 247000, 244000, 246000, 242000, + 300000, 298000, 302000, 299000, 301000, 297000, + 295000, 293000, 297000, 294000, 296000, 292000, + 260000, 258000, 262000, 259000, 261000, 257000, + 255000, 253000, 257000, 254000, 256000, 252000, + 310000, 308000, 312000, 309000, 311000, 307000, + 305000, 303000, 307000, 304000, 306000, 302000 + ), + ANOMALYSCORES = rep(NA, 144), + originalRUN = rep(paste0("Run", rep(1:4, each = 6)), 6) ) -standards = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD") -output = MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards) +# Calculate ABUNDANCE as log2(INTENSITY) +input[, ABUNDANCE := log2(INTENSITY)] + +# Add some missing values for realism +input[c(5, 12, 23, 34, 45, 67, 89, 111), ABUNDANCE := NA] +input[c(5, 12, 23, 34, 45, 67, 89, 111), INTENSITY := NA] + +# Test the function +output <- MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards) From 6daccd15e8e1b37a3e7d78e3ab207c79febe4940 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Thu, 29 Jan 2026 14:58:40 -0500 Subject: [PATCH 5/8] finalize unit tests for global reference normalization --- inst/tinytest/test_utils_normalize.R | 169 +++++++++++++++++---------- 1 file changed, 107 insertions(+), 62 deletions(-) diff --git a/inst/tinytest/test_utils_normalize.R b/inst/tinytest/test_utils_normalize.R index 83b129a6..ba6773e0 100644 --- a/inst/tinytest/test_utils_normalize.R +++ b/inst/tinytest/test_utils_normalize.R @@ -1,68 +1,113 @@ -library(data.table) +# Test for .normalizeGlobalStandards function ---------------------------------- -# Peptide dictionary -peptide_dict <- data.table( - PeptideSequence = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD"), - PrecursorCharge = c(3, 2, 3), - PEPTIDE = c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3") -) +# Setup test data --------------------------------------------------------------- -# Standards (matching peptide sequences) -standards = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD") +create_peptide_dictionary <- function() { + data.table::data.table( + PeptideSequence = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD"), + PrecursorCharge = c(3, 2, 3), + PEPTIDE = c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3") + ) +} -# Create input data with 4 runs, varying fractions (1-3), 2 groups -set.seed(123) -input <- data.table( - PROTEIN = rep(c("P1", "P2", "P3"), each = 48), # 3 proteins - PEPTIDE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3"), each = 48), - TRANSITION = rep("NA_NA", 144), - FEATURE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3_NA_NA", "AAAAAAAAAAAAVSR_2_NA_NA", "AAAAAAAAAAAAVSRD_3_NA_NA"), each = 48), - LABEL = rep("L", 144), - GROUP_ORIGINAL = rep(rep(c("Control", "Treatment"), each = 24), 3), - SUBJECT_ORIGINAL = rep(rep(c("Control", "Treatment"), each = 24), 3), - RUN = rep(rep(1:4, each = 6), 6), # 4 runs - GROUP = rep(rep(c("Control", "Treatment"), each = 24), 3), - SUBJECT = rep(rep(c("Control", "Treatment"), each = 24), 3), - FRACTION = rep(rep(c(1, 1, 2, 2, 3, 3), 4), 6), # Fractions 1-3 - INTENSITY = c( - # Standard 1 (AAAAAAAAAAAAAAGAGAGAK_3) - 150000, 148000, 152000, 149000, 151000, 147000, # Run 1 - 145000, 143000, 147000, 144000, 146000, 142000, # Run 2 - 200000, 198000, 202000, 199000, 201000, 197000, # Run 3 - 195000, 193000, 197000, 194000, 196000, 192000, # Run 4 - 160000, 158000, 162000, 159000, 161000, 157000, # Run 1 treatment - 155000, 153000, 157000, 154000, 156000, 152000, # Run 2 treatment - 210000, 208000, 212000, 209000, 211000, 207000, # Run 3 treatment - 205000, 203000, 207000, 204000, 206000, 202000, # Run 4 treatment - # Standard 2 (AAAAAAAAAAAAVSR_2) - 500000, 498000, 502000, 499000, 501000, 497000, - 495000, 493000, 497000, 494000, 496000, 492000, - 550000, 548000, 552000, 549000, 551000, 547000, - 545000, 543000, 547000, 544000, 546000, 542000, - 510000, 508000, 512000, 509000, 511000, 507000, - 505000, 503000, 507000, 504000, 506000, 502000, - 560000, 558000, 562000, 559000, 561000, 557000, - 555000, 553000, 557000, 554000, 556000, 552000, - # Standard 3 (AAAAAAAAAAAAVSRD_3) - 250000, 248000, 252000, 249000, 251000, 247000, - 245000, 243000, 247000, 244000, 246000, 242000, - 300000, 298000, 302000, 299000, 301000, 297000, - 295000, 293000, 297000, 294000, 296000, 292000, - 260000, 258000, 262000, 259000, 261000, 257000, - 255000, 253000, 257000, 254000, 256000, 252000, - 310000, 308000, 312000, 309000, 311000, 307000, - 305000, 303000, 307000, 304000, 306000, 302000 - ), - ANOMALYSCORES = rep(NA, 144), - originalRUN = rep(paste0("Run", rep(1:4, each = 6)), 6) -) +create_test_input <- function(standard_intensities, peptide2_intensities, peptide3_intensities) { + # Constants + n_proteins <- 3 + n_runs <- 48 + n_subjects <- 4 + n_fractions <- 6 + + # Create base structure + input <- data.table::data.table( + PROTEIN = rep(c("P1", "P2", "P3"), each = n_runs), + PEPTIDE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3"), + each = n_runs), + TRANSITION = rep("NA_NA", n_proteins * n_runs), + FEATURE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3_NA_NA", + "AAAAAAAAAAAAVSR_2_NA_NA", + "AAAAAAAAAAAAVSRD_3_NA_NA"), + each = n_runs), + LABEL = rep("L", n_proteins * n_runs), + GROUP_ORIGINAL = rep(rep(c("Control", "Treatment"), each = n_runs/2), n_proteins), + SUBJECT_ORIGINAL = rep(paste0("Subject", rep(1:n_subjects, each = n_fractions)), + n_proteins * 2), + RUN = rep(1:n_runs, n_proteins), + GROUP = rep(rep(c("Control", "Treatment"), each = n_runs/2), n_proteins), + SUBJECT = rep(paste0("Subject", rep(1:n_subjects, each = n_fractions)), + n_proteins * 2), + FRACTION = rep(rep(1:n_fractions, n_subjects), n_proteins * 2), + INTENSITY = c(standard_intensities, peptide2_intensities, peptide3_intensities), + ANOMALYSCORES = rep(NA, n_proteins * n_runs), + originalRUN = rep(paste0("Run", 1:n_runs), n_proteins) + ) + + input[, ABUNDANCE := log2(INTENSITY)] + return(input) +} -# Calculate ABUNDANCE as log2(INTENSITY) -input[, ABUNDANCE := log2(INTENSITY)] +# Test 1: Standards with different intensities between groups ------------------- +test_different_group_intensities <- function() { + peptide_dict <- create_peptide_dictionary() + standards <- c("AAAAAAAAAAAAAAGAGAGAK") + + # Control group: 262144, Treatment group: 524288 + standard_intensities <- c( + rep(262144, 24), # Control (runs 1-24) + rep(524288, 24) # Treatment (runs 25-48) + ) + + # Non-standard peptides: all 262144 + peptide2_intensities <- rep(262144, 48) + peptide3_intensities <- rep(262144, 48) + + input <- create_test_input(standard_intensities, peptide2_intensities, peptide3_intensities) + output <- MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards) + + # Verify normalization: Control runs should be shifted up, Treatment runs shifted down + control_runs <- 1:24 + treatment_runs <- 25:48 + + # Check Control group (shifted up to match treatment standard) + control_abundance <- output[RUN %in% control_runs & + !is.na(ABUNDANCE) & + !grepl(standards, PEPTIDE)]$ABUNDANCE + expect_true(all(abs(control_abundance - 18.5) < 1e-10), + info = "Control group non-standard peptides should be normalized to 18.5") + + # Check Treatment group (shifted down to match control standard) + treatment_abundance <- output[RUN %in% treatment_runs & + !is.na(ABUNDANCE) & + !grepl(standards, PEPTIDE)]$ABUNDANCE + expect_true(all(abs(treatment_abundance - 17.5) < 1e-10), + info = "Treatment group non-standard peptides should be normalized to 17.5") +} -# Add some missing values for realism -input[c(5, 12, 23, 34, 45, 67, 89, 111), ABUNDANCE := NA] -input[c(5, 12, 23, 34, 45, 67, 89, 111), INTENSITY := NA] +# Test 2: Standards with alternating intensities within fractions --------------- +test_alternating_intensities_within_fractions <- function() { + peptide_dict <- create_peptide_dictionary() + standards <- c("AAAAAAAAAAAAAAGAGAGAK") + + # Standard alternates between 262144 and 524288 within each fraction + standard_intensities <- rep(c(262144, 524288), 24) + + # Non-standard peptides: all 262144 + peptide2_intensities <- rep(262144, 48) + peptide3_intensities <- rep(262144, 48) + + input <- create_test_input(standard_intensities, peptide2_intensities, peptide3_intensities) + output <- MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards) + + # When standards vary within fractions but average to same level, + # no net normalization should occur + all_runs <- 1:48 + normalized_abundance <- output[RUN %in% all_runs & + !is.na(ABUNDANCE) & + !grepl(standards, PEPTIDE)]$ABUNDANCE + + expect_true(all(abs(normalized_abundance - 18) < 1e-10), + info = "No normalization should occur when standard averages are equal across fractions") +} -# Test the function -output <- MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards) +# Run tests --------------------------------------------------------------------- +test_different_group_intensities() +test_alternating_intensities_within_fractions() \ No newline at end of file From e8ec4064e6f64036058aad0b1c3857702b51cfe5 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Thu, 29 Jan 2026 15:01:21 -0500 Subject: [PATCH 6/8] enable multiple precursors for same peptide in unit tests --- inst/tinytest/test_utils_normalize.R | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/inst/tinytest/test_utils_normalize.R b/inst/tinytest/test_utils_normalize.R index ba6773e0..9a44c393 100644 --- a/inst/tinytest/test_utils_normalize.R +++ b/inst/tinytest/test_utils_normalize.R @@ -4,9 +4,9 @@ create_peptide_dictionary <- function() { data.table::data.table( - PeptideSequence = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD"), + PeptideSequence = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSRD"), PrecursorCharge = c(3, 2, 3), - PEPTIDE = c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3") + PEPTIDE = c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAAAGAGAGAK_2", "AAAAAAAAAAAAVSRD_3") ) } @@ -19,12 +19,12 @@ create_test_input <- function(standard_intensities, peptide2_intensities, peptid # Create base structure input <- data.table::data.table( - PROTEIN = rep(c("P1", "P2", "P3"), each = n_runs), - PEPTIDE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3"), + PROTEIN = rep(c("P1", "P1", "P3"), each = n_runs), + PEPTIDE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAAAGAGAGAK_2", "AAAAAAAAAAAAVSRD_3"), each = n_runs), TRANSITION = rep("NA_NA", n_proteins * n_runs), FEATURE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3_NA_NA", - "AAAAAAAAAAAAVSR_2_NA_NA", + "AAAAAAAAAAAAAAGAGAGAK_2_NA_NA", "AAAAAAAAAAAAVSRD_3_NA_NA"), each = n_runs), LABEL = rep("L", n_proteins * n_runs), @@ -57,10 +57,9 @@ test_different_group_intensities <- function() { ) # Non-standard peptides: all 262144 - peptide2_intensities <- rep(262144, 48) peptide3_intensities <- rep(262144, 48) - input <- create_test_input(standard_intensities, peptide2_intensities, peptide3_intensities) + input <- create_test_input(standard_intensities, standard_intensities, peptide3_intensities) output <- MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards) # Verify normalization: Control runs should be shifted up, Treatment runs shifted down @@ -91,10 +90,9 @@ test_alternating_intensities_within_fractions <- function() { standard_intensities <- rep(c(262144, 524288), 24) # Non-standard peptides: all 262144 - peptide2_intensities <- rep(262144, 48) peptide3_intensities <- rep(262144, 48) - input <- create_test_input(standard_intensities, peptide2_intensities, peptide3_intensities) + input <- create_test_input(standard_intensities, standard_intensities, peptide3_intensities) output <- MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards) # When standards vary within fractions but average to same level, From a8cf7d7578f7ea289232f0382847f83d524dc92b Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Thu, 29 Jan 2026 15:20:34 -0500 Subject: [PATCH 7/8] remove dead code --- R/utils_normalize.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/utils_normalize.R b/R/utils_normalize.R index ce6d2577..13563717 100644 --- a/R/utils_normalize.R +++ b/R/utils_normalize.R @@ -194,7 +194,6 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s Standard = FRACTION = LABEL = ABUNDANCE = RUN = GROUP = NULL proteins = as.character(unique(input$PROTEIN)) - means_by_standard = unique(input[, list(RUN)]) input_with_peptides <- merge(input, peptides_dict, by = "PEPTIDE", all.x = TRUE) standards_data <- input_with_peptides[ (PeptideSequence %in% standards | PROTEIN %in% standards) & From 94865cc2073661b4aa3225e62d588a068b94cc06 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Thu, 29 Jan 2026 15:27:12 -0500 Subject: [PATCH 8/8] remove more dead code --- R/utils_normalize.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/utils_normalize.R b/R/utils_normalize.R index 13563717..fe2c0f17 100644 --- a/R/utils_normalize.R +++ b/R/utils_normalize.R @@ -192,8 +192,7 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s .normalizeGlobalStandards = function(input, peptides_dict, standards) { PeptideSequence = PEPTIDE = PROTEIN = median_by_fraction = NULL Standard = FRACTION = LABEL = ABUNDANCE = RUN = GROUP = NULL - - proteins = as.character(unique(input$PROTEIN)) + input_with_peptides <- merge(input, peptides_dict, by = "PEPTIDE", all.x = TRUE) standards_data <- input_with_peptides[ (PeptideSequence %in% standards | PROTEIN %in% standards) &