From 71cc52678ce0549e09bc1f56a93e520140f7d30d Mon Sep 17 00:00:00 2001
From: Tony Wu <wu.anthon@northeastern.edu>
Date: Tue, 13 Jan 2026 10:45:46 -0500
Subject: [PATCH 1/8] fix(global-standards): Enable multiple precursors for the
 same reference peptide

---
 R/utils_normalize.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/utils_normalize.R b/R/utils_normalize.R
index 70820e20..da5bda43 100644
--- a/R/utils_normalize.R
+++ b/R/utils_normalize.R
@@ -199,7 +199,7 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s
         peptide_name = unlist(peptides_dict[PeptideSequence == standards[standard_id],
                                             as.character(PEPTIDE)], FALSE, FALSE)
         if (length(peptide_name) > 0) {
-            standard = input[PEPTIDE == peptide_name, ]
+            standard = input[PEPTIDE %in% peptide_name, ]
         } else {
             if (standards[standard_id] %in% proteins) {
                 standard = input[PROTEIN == standards[standard_id], ]

From e4975eba91c9aa5788a4e91f97539e85685750d3 Mon Sep 17 00:00:00 2001
From: Tony Wu <wu.anthon@northeastern.edu>
Date: Tue, 13 Jan 2026 11:17:23 -0500
Subject: [PATCH 2/8] make normalize by global standards more efficient

---
 R/utils_normalize.R | 45 +++++++++++++++++++++++----------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/R/utils_normalize.R b/R/utils_normalize.R
index da5bda43..ce6d2577 100644
--- a/R/utils_normalize.R
+++ b/R/utils_normalize.R
@@ -195,29 +195,30 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s
     
     proteins = as.character(unique(input$PROTEIN))
     means_by_standard = unique(input[, list(RUN)])
-    for (standard_id in seq_along(standards)) {
-        peptide_name = unlist(peptides_dict[PeptideSequence == standards[standard_id],
-                                            as.character(PEPTIDE)], FALSE, FALSE)
-        if (length(peptide_name) > 0) {
-            standard = input[PEPTIDE %in% peptide_name, ]
-        } else {
-            if (standards[standard_id] %in% proteins) {
-                standard = input[PROTEIN == standards[standard_id], ]
-            } else {
-                msg = paste("global standard peptides or proteins, ",
-                            standards[standard_id],", is not in dataset.",
-                            "Please check whether 'nameStandards' input is correct or not.")
-                getOption("MSstatsLog")("ERROR", msg)
-                stop(msg)
-            }
-        }
-        mean_by_run = standard[GROUP != "0" & !is.na(ABUNDANCE),
-                               list(mean_abundance = mean(ABUNDANCE, na.rm = TRUE)),
-                               by = "RUN"]
-        colnames(mean_by_run)[2] = paste0("meanStandard", standard_id)
-        means_by_standard = merge(means_by_standard, mean_by_run,
-                                  by = "RUN", all.x = TRUE)
+    input_with_peptides <- merge(input, peptides_dict, by = "PEPTIDE", all.x = TRUE)
+    standards_data <- input_with_peptides[
+        (PeptideSequence %in% standards | PROTEIN %in% standards) & 
+            GROUP != "0" & 
+            !is.na(ABUNDANCE)
+    ]
+    missing_standards <- standards[!standards %in% c(standards_data$PeptideSequence, standards_data$PROTEIN)]
+    if (length(missing_standards) > 0) {
+        msg <- paste("Global standard peptides or proteins,",
+                     paste(missing_standards, collapse = ", "),
+                     "are not in dataset. Please check whether 'nameStandards' input is correct.")
+        getOption("MSstatsLog")("ERROR", msg)
+        stop(msg)
     }
+    standards_data[, standard := ifelse(!is.na(PeptideSequence) & PeptideSequence %in% standards,
+                                        PeptideSequence,
+                                        PROTEIN)]
+    means_by_standard <- standards_data[, 
+                                        list(mean_abundance = mean(ABUNDANCE, na.rm = TRUE)),
+                                        by = .(RUN, standard)
+    ]
+    means_by_standard <- dcast(means_by_standard, 
+                               RUN ~ standard, 
+                               value.var = "mean_abundance")
     means_by_standard = data.table::melt(means_by_standard, id.vars = "RUN",
                                          variable.name = "Standard", value.name = "ABUNDANCE")
     means_by_standard[, mean_by_run := mean(ABUNDANCE, na.rm = TRUE), by = "RUN"]

From fbc8879ae4500202d85fec113304482504066046 Mon Sep 17 00:00:00 2001
From: tonywu1999 <wu.anthon@northeastern.edu>
Date: Wed, 14 Jan 2026 13:05:27 -0500
Subject: [PATCH 3/8] tests: add initial mock data for unit tests

---
 inst/tinytest/test_utils_normalize.R | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 inst/tinytest/test_utils_normalize.R

diff --git a/inst/tinytest/test_utils_normalize.R b/inst/tinytest/test_utils_normalize.R
new file mode 100644
index 00000000..56e27c18
--- /dev/null
+++ b/inst/tinytest/test_utils_normalize.R
@@ -0,0 +1,26 @@
+peptide_dict <- data.table::data.table(
+  PeptideSequence = c( "AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD"),
+  PrecursorCharge = c( 3, 2, 3),
+  PEPTIDE = c( "AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3")
+)
+
+input <- data.table(
+  PROTEIN = c( "P1", "P1", "P1", "P1", "P1", "P1" ),
+  PEPTIDE = c( "PEP1", "PEP1", "PEP1", "PEP1", "PEP1", "PEP1" ),
+  TRANSITION = c( "NA_NA", "NA_NA", "NA_NA", "NA_NA", "NA_NA", "NA_NA" ),
+  FEATURE = c( "PEP1_2_NA_NA", "PEP1_3_NA_NA", "PEP1_2_NA_NA", "PEP1_2_NA_NA", "PEP1_3_NA_NA", "PEP1_2_NA_NA" ),
+  LABEL = c( "L", "L", "L", "L", "L", "L" ),
+  GROUP_ORIGINAL = c( "0hr", "0hr", "0hr", "0hr", "0hr", "0hr" ),
+  SUBJECT_ORIGINAL = c( "0hr", "0hr", "0hr", "0hr", "0hr", "0hr" ),
+  RUN = c( 1, 1, 1, 1, 1, 1 ),
+  GROUP = c( "0hr", "0hr", "0hr", "0hr", "0hr", "0hr" ),
+  SUBJECT = c( "0hr", "0hr", "0hr", "0hr", "0hr", "0hr" ),
+  FRACTION = c( 1, 1, 1, 1, 1, 1 ),
+  INTENSITY = c( 180697888, 674297.25, NA, 267224.25, NA, NA ),
+  ANOMALYSCORES = c( NA, NA, NA, NA, NA, NA ),
+  ABUNDANCE = c( 27.429, 19.363, NA, 18.028, NA, NA ),
+  originalRUN = c( "Run1", "Run1", "Run1", "Run1", "Run1", "Run1" )
+)
+
+standards = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD")
+output = MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards)

From c34ca7444739bb4758dd7f3dbbbf255bdefe24b4 Mon Sep 17 00:00:00 2001
From: tonywu1999 <wu.anthon@northeastern.edu>
Date: Wed, 14 Jan 2026 13:24:53 -0500
Subject: [PATCH 4/8] modify test dataset

---
 inst/tinytest/test_utils_normalize.R | 84 +++++++++++++++++++++-------
 1 file changed, 63 insertions(+), 21 deletions(-)

diff --git a/inst/tinytest/test_utils_normalize.R b/inst/tinytest/test_utils_normalize.R
index 56e27c18..83b129a6 100644
--- a/inst/tinytest/test_utils_normalize.R
+++ b/inst/tinytest/test_utils_normalize.R
@@ -1,26 +1,68 @@
-peptide_dict <- data.table::data.table(
-  PeptideSequence = c( "AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD"),
-  PrecursorCharge = c( 3, 2, 3),
-  PEPTIDE = c( "AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3")
+library(data.table)
+
+# Peptide dictionary
+peptide_dict <- data.table(
+  PeptideSequence = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD"),
+  PrecursorCharge = c(3, 2, 3),
+  PEPTIDE = c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3")
 )
 
+# Standards (matching peptide sequences)
+standards = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD")
+
+# Create input data with 4 runs, varying fractions (1-3), 2 groups
+set.seed(123)
 input <- data.table(
-  PROTEIN = c( "P1", "P1", "P1", "P1", "P1", "P1" ),
-  PEPTIDE = c( "PEP1", "PEP1", "PEP1", "PEP1", "PEP1", "PEP1" ),
-  TRANSITION = c( "NA_NA", "NA_NA", "NA_NA", "NA_NA", "NA_NA", "NA_NA" ),
-  FEATURE = c( "PEP1_2_NA_NA", "PEP1_3_NA_NA", "PEP1_2_NA_NA", "PEP1_2_NA_NA", "PEP1_3_NA_NA", "PEP1_2_NA_NA" ),
-  LABEL = c( "L", "L", "L", "L", "L", "L" ),
-  GROUP_ORIGINAL = c( "0hr", "0hr", "0hr", "0hr", "0hr", "0hr" ),
-  SUBJECT_ORIGINAL = c( "0hr", "0hr", "0hr", "0hr", "0hr", "0hr" ),
-  RUN = c( 1, 1, 1, 1, 1, 1 ),
-  GROUP = c( "0hr", "0hr", "0hr", "0hr", "0hr", "0hr" ),
-  SUBJECT = c( "0hr", "0hr", "0hr", "0hr", "0hr", "0hr" ),
-  FRACTION = c( 1, 1, 1, 1, 1, 1 ),
-  INTENSITY = c( 180697888, 674297.25, NA, 267224.25, NA, NA ),
-  ANOMALYSCORES = c( NA, NA, NA, NA, NA, NA ),
-  ABUNDANCE = c( 27.429, 19.363, NA, 18.028, NA, NA ),
-  originalRUN = c( "Run1", "Run1", "Run1", "Run1", "Run1", "Run1" )
+  PROTEIN = rep(c("P1", "P2", "P3"), each = 48),  # 3 proteins
+  PEPTIDE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3"), each = 48),
+  TRANSITION = rep("NA_NA", 144),
+  FEATURE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3_NA_NA", "AAAAAAAAAAAAVSR_2_NA_NA", "AAAAAAAAAAAAVSRD_3_NA_NA"), each = 48),
+  LABEL = rep("L", 144),
+  GROUP_ORIGINAL = rep(rep(c("Control", "Treatment"), each = 24), 3),
+  SUBJECT_ORIGINAL = rep(rep(c("Control", "Treatment"), each = 24), 3),
+  RUN = rep(rep(1:4, each = 6), 6),  # 4 runs
+  GROUP = rep(rep(c("Control", "Treatment"), each = 24), 3),
+  SUBJECT = rep(rep(c("Control", "Treatment"), each = 24), 3),
+  FRACTION = rep(rep(c(1, 1, 2, 2, 3, 3), 4), 6),  # Fractions 1-3
+  INTENSITY = c(
+    # Standard 1 (AAAAAAAAAAAAAAGAGAGAK_3)
+    150000, 148000, 152000, 149000, 151000, 147000,  # Run 1
+    145000, 143000, 147000, 144000, 146000, 142000,  # Run 2
+    200000, 198000, 202000, 199000, 201000, 197000,  # Run 3
+    195000, 193000, 197000, 194000, 196000, 192000,  # Run 4
+    160000, 158000, 162000, 159000, 161000, 157000,  # Run 1 treatment
+    155000, 153000, 157000, 154000, 156000, 152000,  # Run 2 treatment
+    210000, 208000, 212000, 209000, 211000, 207000,  # Run 3 treatment
+    205000, 203000, 207000, 204000, 206000, 202000,  # Run 4 treatment
+    # Standard 2 (AAAAAAAAAAAAVSR_2)
+    500000, 498000, 502000, 499000, 501000, 497000,
+    495000, 493000, 497000, 494000, 496000, 492000,
+    550000, 548000, 552000, 549000, 551000, 547000,
+    545000, 543000, 547000, 544000, 546000, 542000,
+    510000, 508000, 512000, 509000, 511000, 507000,
+    505000, 503000, 507000, 504000, 506000, 502000,
+    560000, 558000, 562000, 559000, 561000, 557000,
+    555000, 553000, 557000, 554000, 556000, 552000,
+    # Standard 3 (AAAAAAAAAAAAVSRD_3)
+    250000, 248000, 252000, 249000, 251000, 247000,
+    245000, 243000, 247000, 244000, 246000, 242000,
+    300000, 298000, 302000, 299000, 301000, 297000,
+    295000, 293000, 297000, 294000, 296000, 292000,
+    260000, 258000, 262000, 259000, 261000, 257000,
+    255000, 253000, 257000, 254000, 256000, 252000,
+    310000, 308000, 312000, 309000, 311000, 307000,
+    305000, 303000, 307000, 304000, 306000, 302000
+  ),
+  ANOMALYSCORES = rep(NA, 144),
+  originalRUN = rep(paste0("Run", rep(1:4, each = 6)), 6)
 )
 
-standards = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD")
-output = MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards)
+# Calculate ABUNDANCE as log2(INTENSITY)
+input[, ABUNDANCE := log2(INTENSITY)]
+
+# Add some missing values for realism
+input[c(5, 12, 23, 34, 45, 67, 89, 111), ABUNDANCE := NA]
+input[c(5, 12, 23, 34, 45, 67, 89, 111), INTENSITY := NA]
+
+# Test the function
+output <- MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards)

From 6daccd15e8e1b37a3e7d78e3ab207c79febe4940 Mon Sep 17 00:00:00 2001
From: Tony Wu <wu.anthon@northeastern.edu>
Date: Thu, 29 Jan 2026 14:58:40 -0500
Subject: [PATCH 5/8] finalize unit tests for global reference normalization

---
 inst/tinytest/test_utils_normalize.R | 169 +++++++++++++++++----------
 1 file changed, 107 insertions(+), 62 deletions(-)

diff --git a/inst/tinytest/test_utils_normalize.R b/inst/tinytest/test_utils_normalize.R
index 83b129a6..ba6773e0 100644
--- a/inst/tinytest/test_utils_normalize.R
+++ b/inst/tinytest/test_utils_normalize.R
@@ -1,68 +1,113 @@
-library(data.table)
+# Test for .normalizeGlobalStandards function ----------------------------------
 
-# Peptide dictionary
-peptide_dict <- data.table(
-  PeptideSequence = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD"),
-  PrecursorCharge = c(3, 2, 3),
-  PEPTIDE = c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3")
-)
+# Setup test data ---------------------------------------------------------------
 
-# Standards (matching peptide sequences)
-standards = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD")
+create_peptide_dictionary <- function() {
+    data.table::data.table(
+        PeptideSequence = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD"),
+        PrecursorCharge = c(3, 2, 3),
+        PEPTIDE = c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3")
+    )
+}
 
-# Create input data with 4 runs, varying fractions (1-3), 2 groups
-set.seed(123)
-input <- data.table(
-  PROTEIN = rep(c("P1", "P2", "P3"), each = 48),  # 3 proteins
-  PEPTIDE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3"), each = 48),
-  TRANSITION = rep("NA_NA", 144),
-  FEATURE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3_NA_NA", "AAAAAAAAAAAAVSR_2_NA_NA", "AAAAAAAAAAAAVSRD_3_NA_NA"), each = 48),
-  LABEL = rep("L", 144),
-  GROUP_ORIGINAL = rep(rep(c("Control", "Treatment"), each = 24), 3),
-  SUBJECT_ORIGINAL = rep(rep(c("Control", "Treatment"), each = 24), 3),
-  RUN = rep(rep(1:4, each = 6), 6),  # 4 runs
-  GROUP = rep(rep(c("Control", "Treatment"), each = 24), 3),
-  SUBJECT = rep(rep(c("Control", "Treatment"), each = 24), 3),
-  FRACTION = rep(rep(c(1, 1, 2, 2, 3, 3), 4), 6),  # Fractions 1-3
-  INTENSITY = c(
-    # Standard 1 (AAAAAAAAAAAAAAGAGAGAK_3)
-    150000, 148000, 152000, 149000, 151000, 147000,  # Run 1
-    145000, 143000, 147000, 144000, 146000, 142000,  # Run 2
-    200000, 198000, 202000, 199000, 201000, 197000,  # Run 3
-    195000, 193000, 197000, 194000, 196000, 192000,  # Run 4
-    160000, 158000, 162000, 159000, 161000, 157000,  # Run 1 treatment
-    155000, 153000, 157000, 154000, 156000, 152000,  # Run 2 treatment
-    210000, 208000, 212000, 209000, 211000, 207000,  # Run 3 treatment
-    205000, 203000, 207000, 204000, 206000, 202000,  # Run 4 treatment
-    # Standard 2 (AAAAAAAAAAAAVSR_2)
-    500000, 498000, 502000, 499000, 501000, 497000,
-    495000, 493000, 497000, 494000, 496000, 492000,
-    550000, 548000, 552000, 549000, 551000, 547000,
-    545000, 543000, 547000, 544000, 546000, 542000,
-    510000, 508000, 512000, 509000, 511000, 507000,
-    505000, 503000, 507000, 504000, 506000, 502000,
-    560000, 558000, 562000, 559000, 561000, 557000,
-    555000, 553000, 557000, 554000, 556000, 552000,
-    # Standard 3 (AAAAAAAAAAAAVSRD_3)
-    250000, 248000, 252000, 249000, 251000, 247000,
-    245000, 243000, 247000, 244000, 246000, 242000,
-    300000, 298000, 302000, 299000, 301000, 297000,
-    295000, 293000, 297000, 294000, 296000, 292000,
-    260000, 258000, 262000, 259000, 261000, 257000,
-    255000, 253000, 257000, 254000, 256000, 252000,
-    310000, 308000, 312000, 309000, 311000, 307000,
-    305000, 303000, 307000, 304000, 306000, 302000
-  ),
-  ANOMALYSCORES = rep(NA, 144),
-  originalRUN = rep(paste0("Run", rep(1:4, each = 6)), 6)
-)
+create_test_input <- function(standard_intensities, peptide2_intensities, peptide3_intensities) {
+    # Constants
+    n_proteins <- 3
+    n_runs <- 48
+    n_subjects <- 4
+    n_fractions <- 6
+    
+    # Create base structure
+    input <- data.table::data.table(
+        PROTEIN = rep(c("P1", "P2", "P3"), each = n_runs),
+        PEPTIDE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3"), 
+                      each = n_runs),
+        TRANSITION = rep("NA_NA", n_proteins * n_runs),
+        FEATURE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3_NA_NA", 
+                        "AAAAAAAAAAAAVSR_2_NA_NA", 
+                        "AAAAAAAAAAAAVSRD_3_NA_NA"), 
+                      each = n_runs),
+        LABEL = rep("L", n_proteins * n_runs),
+        GROUP_ORIGINAL = rep(rep(c("Control", "Treatment"), each = n_runs/2), n_proteins),
+        SUBJECT_ORIGINAL = rep(paste0("Subject", rep(1:n_subjects, each = n_fractions)), 
+                               n_proteins * 2),
+        RUN = rep(1:n_runs, n_proteins),
+        GROUP = rep(rep(c("Control", "Treatment"), each = n_runs/2), n_proteins),
+        SUBJECT = rep(paste0("Subject", rep(1:n_subjects, each = n_fractions)), 
+                      n_proteins * 2),
+        FRACTION = rep(rep(1:n_fractions, n_subjects), n_proteins * 2),
+        INTENSITY = c(standard_intensities, peptide2_intensities, peptide3_intensities),
+        ANOMALYSCORES = rep(NA, n_proteins * n_runs),
+        originalRUN = rep(paste0("Run", 1:n_runs), n_proteins)
+    )
+    
+    input[, ABUNDANCE := log2(INTENSITY)]
+    return(input)
+}
 
-# Calculate ABUNDANCE as log2(INTENSITY)
-input[, ABUNDANCE := log2(INTENSITY)]
+# Test 1: Standards with different intensities between groups -------------------
+test_different_group_intensities <- function() {
+    peptide_dict <- create_peptide_dictionary()
+    standards <- c("AAAAAAAAAAAAAAGAGAGAK")
+    
+    # Control group: 262144, Treatment group: 524288
+    standard_intensities <- c(
+        rep(262144, 24),  # Control (runs 1-24)
+        rep(524288, 24)   # Treatment (runs 25-48)
+    )
+    
+    # Non-standard peptides: all 262144
+    peptide2_intensities <- rep(262144, 48)
+    peptide3_intensities <- rep(262144, 48)
+    
+    input <- create_test_input(standard_intensities, peptide2_intensities, peptide3_intensities)
+    output <- MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards)
+    
+    # Verify normalization: Control runs should be shifted up, Treatment runs shifted down
+    control_runs <- 1:24
+    treatment_runs <- 25:48
+    
+    # Check Control group (shifted up to match treatment standard)
+    control_abundance <- output[RUN %in% control_runs & 
+                                    !is.na(ABUNDANCE) & 
+                                    !grepl(standards, PEPTIDE)]$ABUNDANCE
+    expect_true(all(abs(control_abundance - 18.5) < 1e-10),
+                info = "Control group non-standard peptides should be normalized to 18.5")
+    
+    # Check Treatment group (shifted down to match control standard)
+    treatment_abundance <- output[RUN %in% treatment_runs & 
+                                      !is.na(ABUNDANCE) & 
+                                      !grepl(standards, PEPTIDE)]$ABUNDANCE
+    expect_true(all(abs(treatment_abundance - 17.5) < 1e-10),
+                info = "Treatment group non-standard peptides should be normalized to 17.5")
+}
 
-# Add some missing values for realism
-input[c(5, 12, 23, 34, 45, 67, 89, 111), ABUNDANCE := NA]
-input[c(5, 12, 23, 34, 45, 67, 89, 111), INTENSITY := NA]
+# Test 2: Standards with alternating intensities within fractions ---------------
+test_alternating_intensities_within_fractions <- function() {
+    peptide_dict <- create_peptide_dictionary()
+    standards <- c("AAAAAAAAAAAAAAGAGAGAK")
+    
+    # Standard alternates between 262144 and 524288 within each fraction
+    standard_intensities <- rep(c(262144, 524288), 24)
+    
+    # Non-standard peptides: all 262144
+    peptide2_intensities <- rep(262144, 48)
+    peptide3_intensities <- rep(262144, 48)
+    
+    input <- create_test_input(standard_intensities, peptide2_intensities, peptide3_intensities)
+    output <- MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards)
+    
+    # When standards vary within fractions but average to same level,
+    # no net normalization should occur
+    all_runs <- 1:48
+    normalized_abundance <- output[RUN %in% all_runs & 
+                                       !is.na(ABUNDANCE) & 
+                                       !grepl(standards, PEPTIDE)]$ABUNDANCE
+    
+    expect_true(all(abs(normalized_abundance - 18) < 1e-10),
+                info = "No normalization should occur when standard averages are equal across fractions")
+}
 
-# Test the function
-output <- MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards)
+# Run tests ---------------------------------------------------------------------
+test_different_group_intensities()
+test_alternating_intensities_within_fractions()
\ No newline at end of file

From e8ec4064e6f64036058aad0b1c3857702b51cfe5 Mon Sep 17 00:00:00 2001
From: Tony Wu <wu.anthon@northeastern.edu>
Date: Thu, 29 Jan 2026 15:01:21 -0500
Subject: [PATCH 6/8] enable  multiple precursors for same peptide in unit
 tests

---
 inst/tinytest/test_utils_normalize.R | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/inst/tinytest/test_utils_normalize.R b/inst/tinytest/test_utils_normalize.R
index ba6773e0..9a44c393 100644
--- a/inst/tinytest/test_utils_normalize.R
+++ b/inst/tinytest/test_utils_normalize.R
@@ -4,9 +4,9 @@
 
 create_peptide_dictionary <- function() {
     data.table::data.table(
-        PeptideSequence = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSR", "AAAAAAAAAAAAVSRD"),
+        PeptideSequence = c("AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAAAGAGAGAK", "AAAAAAAAAAAAVSRD"),
         PrecursorCharge = c(3, 2, 3),
-        PEPTIDE = c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3")
+        PEPTIDE = c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAAAGAGAGAK_2", "AAAAAAAAAAAAVSRD_3")
     )
 }
 
@@ -19,12 +19,12 @@ create_test_input <- function(standard_intensities, peptide2_intensities, peptid
     
     # Create base structure
     input <- data.table::data.table(
-        PROTEIN = rep(c("P1", "P2", "P3"), each = n_runs),
-        PEPTIDE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAVSR_2", "AAAAAAAAAAAAVSRD_3"), 
+        PROTEIN = rep(c("P1", "P1", "P3"), each = n_runs),
+        PEPTIDE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3", "AAAAAAAAAAAAAAGAGAGAK_2", "AAAAAAAAAAAAVSRD_3"), 
                       each = n_runs),
         TRANSITION = rep("NA_NA", n_proteins * n_runs),
         FEATURE = rep(c("AAAAAAAAAAAAAAGAGAGAK_3_NA_NA", 
-                        "AAAAAAAAAAAAVSR_2_NA_NA", 
+                        "AAAAAAAAAAAAAAGAGAGAK_2_NA_NA", 
                         "AAAAAAAAAAAAVSRD_3_NA_NA"), 
                       each = n_runs),
         LABEL = rep("L", n_proteins * n_runs),
@@ -57,10 +57,9 @@ test_different_group_intensities <- function() {
     )
     
     # Non-standard peptides: all 262144
-    peptide2_intensities <- rep(262144, 48)
     peptide3_intensities <- rep(262144, 48)
     
-    input <- create_test_input(standard_intensities, peptide2_intensities, peptide3_intensities)
+    input <- create_test_input(standard_intensities, standard_intensities, peptide3_intensities)
     output <- MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards)
     
     # Verify normalization: Control runs should be shifted up, Treatment runs shifted down
@@ -91,10 +90,9 @@ test_alternating_intensities_within_fractions <- function() {
     standard_intensities <- rep(c(262144, 524288), 24)
     
     # Non-standard peptides: all 262144
-    peptide2_intensities <- rep(262144, 48)
     peptide3_intensities <- rep(262144, 48)
     
-    input <- create_test_input(standard_intensities, peptide2_intensities, peptide3_intensities)
+    input <- create_test_input(standard_intensities, standard_intensities, peptide3_intensities)
     output <- MSstats:::.normalizeGlobalStandards(input, peptide_dict, standards)
     
     # When standards vary within fractions but average to same level,

From a8cf7d7578f7ea289232f0382847f83d524dc92b Mon Sep 17 00:00:00 2001
From: Tony Wu <wu.anthon@northeastern.edu>
Date: Thu, 29 Jan 2026 15:20:34 -0500
Subject: [PATCH 7/8] remove dead code

---
 R/utils_normalize.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/utils_normalize.R b/R/utils_normalize.R
index ce6d2577..13563717 100644
--- a/R/utils_normalize.R
+++ b/R/utils_normalize.R
@@ -194,7 +194,6 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s
     Standard = FRACTION = LABEL = ABUNDANCE = RUN = GROUP = NULL
     
     proteins = as.character(unique(input$PROTEIN))
-    means_by_standard = unique(input[, list(RUN)])
     input_with_peptides <- merge(input, peptides_dict, by = "PEPTIDE", all.x = TRUE)
     standards_data <- input_with_peptides[
         (PeptideSequence %in% standards | PROTEIN %in% standards) & 

From 94865cc2073661b4aa3225e62d588a068b94cc06 Mon Sep 17 00:00:00 2001
From: Tony Wu <wu.anthon@northeastern.edu>
Date: Thu, 29 Jan 2026 15:27:12 -0500
Subject: [PATCH 8/8] remove more dead code

---
 R/utils_normalize.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/utils_normalize.R b/R/utils_normalize.R
index 13563717..fe2c0f17 100644
--- a/R/utils_normalize.R
+++ b/R/utils_normalize.R
@@ -192,8 +192,7 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s
 .normalizeGlobalStandards = function(input, peptides_dict, standards) {
     PeptideSequence = PEPTIDE = PROTEIN = median_by_fraction = NULL
     Standard = FRACTION = LABEL = ABUNDANCE = RUN = GROUP = NULL
-    
-    proteins = as.character(unique(input$PROTEIN))
+
     input_with_peptides <- merge(input, peptides_dict, by = "PEPTIDE", all.x = TRUE)
     standards_data <- input_with_peptides[
         (PeptideSequence %in% standards | PROTEIN %in% standards) &